├── tests
    ├── .gitattributes
    ├── benchmarks
    │   ├── CMakeLists.txt
    │   ├── bench-minhash.c
    │   ├── bench-bloomfilter.c
    │   ├── bench-bitmap.c
    │   └── benchmark.h
    ├── examples
    │   ├── bf-uniq.t
    │   ├── hll-wc.t
    │   ├── example-bitmap.c
    │   ├── example-hyperloglog.c
    │   ├── example-bloomfilter.c
    │   ├── example-minhash.c
    │   ├── example-bitmap-rle.c
    │   ├── CMakeLists.txt
    │   ├── example-bloomfilter-a2.c
    │   ├── hll-wc.c
    │   └── bf-uniq.c
    ├── CMakeLists.txt
    ├── check
    │   ├── check.pc.in
    │   ├── CMakeLists.txt
    │   ├── check_error.h
    │   ├── check_print.h
    │   ├── check_msg.h
    │   ├── check_str.h
    │   ├── check_list.h
    │   ├── Makefile.am
    │   ├── check_pack.h
    │   ├── check_error.c
    │   ├── check_log.h
    │   ├── check_list.c
    │   ├── check_str.c
    │   ├── libcompat.h
    │   ├── check_impl.h
    │   └── check_print.c
    ├── test.h
    ├── test-minhash.c
    └── test-bloomfilter.c
├── docs
    ├── .gitattributes
    ├── tw_bitmap.2.md
    ├── tw_bitmap.2
    └── CMakeLists.txt
├── include
    ├── CMakeLists.txt
    ├── twiddle.h
    └── twiddle
    │   ├── utils
    │       ├── projection.h
    │       └── hash.h
    │   ├── hash
    │       └── minhash.h
    │   ├── hyperloglog
    │       └── hyperloglog.h
    │   └── bloomfilter
    │       ├── bloomfilter.h
    │       └── bloomfilter_a2.h
├── tools
    ├── docurium
    │   ├── Gemfile
    │   ├── api.docurium
    │   └── gen-doc
    ├── git
    │   └── hook-clang-format
    ├── cram
    │   └── ccram
    ├── cmake
    │   ├── FindVersion.cmake
    │   ├── FindLTO.cmake
    │   ├── FindParseArguments.cmake
    │   ├── FindOptions.cmake
    │   └── FindPrereqs.cmake
    └── travis
    │   └── test
├── .clang-format
├── src
    ├── twiddle
    │   ├── utils
    │   │   ├── hash.c
    │   │   ├── internal.h
    │   │   ├── murmur3.c
    │   │   └── metrohash.c
    │   ├── macrology.h
    │   ├── hyperloglog
    │   │   ├── hyperloglog_simd.c
    │   │   └── hyperloglog.c
    │   └── bloomfilter
    │   │   ├── bloomfilter.c
    │   │   └── bloomfilter_a2.c
    ├── libtwiddle.pc.in
    └── CMakeLists.txt
├── python
    ├── tests
    │   ├── test_bloomfilter_a2.py
    │   ├── test_minhash.py
    │   ├── test_hyperloglog.py
    │   ├── test_helpers.py
    │   ├── test_bloomfilter.py
    │   ├── test_bitmap_rle.py
    │   └── test_bitmap.py
    ├── twiddle
    │   ├── __init__.py
    │   ├── minhash.py
    │   ├── hyperloglog.py
    │   ├── bloomfilter.py
    │   ├── bitmap_rle.py
    │   ├── bloomfilter_a2.py
    │   └── bitmap.py
    └── setup.py
├── .gitignore
├── LICENSE
├── CMakeLists.txt
├── README.md
├── .travis.yml
├── CONTRIBUTING.md
└── EXAMPLES.md


/tests/.gitattributes:
--------------------------------------------------------------------------------
1 | *.t       -whitespace
2 | cram.py   -diff
3 | 


--------------------------------------------------------------------------------
/docs/.gitattributes:
--------------------------------------------------------------------------------
1 | *.graffle      -diff -whitespace
2 | /*.[1-9]       -diff -whitespace
3 | 


--------------------------------------------------------------------------------
/include/CMakeLists.txt:
--------------------------------------------------------------------------------
1 | install(DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}/
2 |     DESTINATION include
3 |     FILES_MATCHING PATTERN "*.h")
4 | 


--------------------------------------------------------------------------------
/tests/benchmarks/CMakeLists.txt:
--------------------------------------------------------------------------------
1 | add_c_benchmark(bench-bitmap)
2 | add_c_benchmark(bench-bloomfilter)
3 | add_c_benchmark(bench-minhash)
4 | 


--------------------------------------------------------------------------------
/tools/docurium/Gemfile:
--------------------------------------------------------------------------------
 1 | source "http://rubygems.org"
 2 | 
 3 | platforms :rbx do
 4 |     gem 'rubysl', '~> 2.0'
 5 | end
 6 | 
 7 | gem 'docurium', :git => 'https://github.com/fsaintjacques/docurium.git', :branch => 'feature/group-command'
 8 | 
 9 | # vim:ft=ruby
10 | 


--------------------------------------------------------------------------------
/tools/docurium/api.docurium:
--------------------------------------------------------------------------------
 1 | {
 2 |   "name":   "libtwiddle",
 3 |   "github": "fsaintjacques/libtwiddle",
 4 |   "input":  "include/twiddle",
 5 |   "prefix": "tw_",
 6 |   "branch": "gh-pages",
 7 |   "examples": "tests/examples",
 8 |   "versions": ["1.0.0"]
 9 | }
10 | 


--------------------------------------------------------------------------------
/tests/examples/bf-uniq.t:
--------------------------------------------------------------------------------
1 |   $ yes uuidgen | head -500 | xargs -L1 bash -c | sort > uuids
2 |   $ uniq uuids > uuids.uniq
3 |   $ cat uuids | bf-uniq > uuids.bf-uniq
4 |   $ diff uuids.uniq uuids.bf-uniq
5 |   $ (echo "a"; sleep 2; echo "a") | bf-uniq -d 1s
6 |   a
7 |   a
8 | 


--------------------------------------------------------------------------------
/.clang-format:
--------------------------------------------------------------------------------
 1 | Language: Cpp
 2 | BasedOnStyle: LLVM
 3 | IndentWidth: 2
 4 | ColumnLimit: 80
 5 | UseTab: Never
 6 | BreakBeforeBraces: Linux
 7 | BinPackArguments: true
 8 | BinPackParameters: true
 9 | AllowShortIfStatementsOnASingleLine: false
10 | IndentCaseLabels: false
11 | 


--------------------------------------------------------------------------------
/tests/CMakeLists.txt:
--------------------------------------------------------------------------------
 1 | add_subdirectory(check)
 2 | 
 3 | add_c_test(test-bitmap)
 4 | add_c_test(test-bitmap-rle)
 5 | add_c_test(test-bloomfilter)
 6 | add_c_test(test-bloomfilter-a2)
 7 | add_c_test(test-hyperloglog)
 8 | add_c_test(test-minhash)
 9 | 
10 | add_subdirectory(benchmarks)
11 | add_subdirectory(examples)
12 | 


--------------------------------------------------------------------------------
/tests/examples/hll-wc.t:
--------------------------------------------------------------------------------
 1 |   $ yes uuidgen | head -500 | xargs -L1 bash -c | sort -u > uuids
 2 |   $ cat uuids | wc -l
 3 |   500
 4 |   $ echo $(((500 - $(cat uuids uuids uuids | hll-wc)) / 10))
 5 |   0
 6 |   $ (echo '1'; echo '2'; echo '3'; echo '1'; echo '2'; echo '3') | hll-wc -s
 7 |   1
 8 |   2
 9 |   3
10 |   3
11 |   3
12 |   3
13 | 


--------------------------------------------------------------------------------
/src/twiddle/utils/hash.c:
--------------------------------------------------------------------------------
 1 | #include <twiddle/utils/hash.h>
 2 | 
 3 | uint64_t tw_hash_128_64(tw_uint128_t x)
 4 | {
 5 |   // Murmur-inspired hashing.
 6 |   const uint64_t k = 0x9ddfea08eb392d69ULL;
 7 |   uint64_t a = (x.l ^ x.h) * k;
 8 |   a ^= (a >> 47);
 9 |   uint64_t b = (x.h ^ a) * k;
10 |   b ^= (b >> 47);
11 |   b *= k;
12 |   return b;
13 | }
14 | 


--------------------------------------------------------------------------------
/src/libtwiddle.pc.in:
--------------------------------------------------------------------------------
 1 | prefix=@prefix@
 2 | exec_prefix=${prefix}
 3 | libdir=${exec_prefix}/lib
 4 | includedir=${prefix}/include
 5 | sharedir=${prefix}/share
 6 | 
 7 | Name: libtwiddle
 8 | Description: library to help you twiddle bits.
 9 | Version: @VERSION@
10 | URL: https://github.com/fsaintjacques/libtwiddle
11 | Libs: -L${libdir} -ltwiddle -lm
12 | Cflags: -I${includedir}
13 | 


--------------------------------------------------------------------------------
/include/twiddle.h:
--------------------------------------------------------------------------------
 1 | #ifndef TWIDDLE_H
 2 | #define TWIDDLE_H
 3 | 
 4 | #include <twiddle/bitmap/bitmap.h>
 5 | #include <twiddle/bitmap/bitmap_rle.h>
 6 | 
 7 | #include <twiddle/bloomfilter/bloomfilter.h>
 8 | #include <twiddle/bloomfilter/bloomfilter_a2.h>
 9 | 
10 | #include <twiddle/hash/minhash.h>
11 | 
12 | #include <twiddle/hyperloglog/hyperloglog.h>
13 | 
14 | #endif /* TWIDDLE_H */
15 | 


--------------------------------------------------------------------------------
/tests/check/check.pc.in:
--------------------------------------------------------------------------------
 1 | prefix=@prefix@
 2 | exec_prefix=@exec_prefix@
 3 | libdir=@libdir@
 4 | includedir=@includedir@
 5 | 
 6 | Name: Check
 7 | Description: A unit test framework for C
 8 | URL: http://check.sourceforge.net
 9 | Version: @VERSION@
10 | Requires.private: @LIBSUBUNIT_PC@
11 | Libs: -L${libdir} -lcheck
12 | Libs.private: @GCOV_LIBS@ @PTHREAD_LIBS@ @LIBS@
13 | Cflags: -I${includedir} @PTHREAD_CFLAGS@
14 | 


--------------------------------------------------------------------------------
/python/tests/test_bloomfilter_a2.py:
--------------------------------------------------------------------------------
 1 | from hypothesis import given
 2 | from test_helpers import TwiddleTest, single_set, double_set
 3 | from twiddle import BloomFilterA2
 4 | 
 5 | class TestBloomFilterA2(TwiddleTest):
 6 |   @given(single_set)
 7 |   def test_bloomfilter_a2(self, n_xs):
 8 |     n, xs = n_xs
 9 |     bf = BloomFilterA2(n, 8, 0.5)
10 | 
11 |     for x in xs:
12 |       bf.set(x)
13 |       assert(x in bf)
14 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
 1 | /build*/
 2 | /.build/
 3 | /tmp/
 4 | /python/.venv/
 5 | /python/.hypothesis/
 6 | /python/.cache/
 7 | 
 8 | 
 9 | libtwiddle.pc
10 | .gdb_history
11 | 
12 | tools/docurium/.bundle/
13 | tools/docurium/Gemfile.lock
14 | tools/docurium/bin/
15 | tools/docurium/vendor/
16 | tools/docurium/mkmf.log
17 | 
18 | /libtwiddle*.tar.gz
19 | /libtwiddle*.tar.bz2
20 | 
21 | *.[oa]
22 | *.os
23 | *.so
24 | *.dylib
25 | *.pyc
26 | 


--------------------------------------------------------------------------------
/python/twiddle/__init__.py:
--------------------------------------------------------------------------------
 1 | from bitmap         import Bitmap
 2 | from bitmap_rle     import BitmapRLE
 3 | from bloomfilter    import BloomFilter
 4 | from bloomfilter_a2 import BloomFilterA2
 5 | from hyperloglog    import HyperLogLog
 6 | from minhash        import MinHash
 7 | 
 8 | __all__ = [ 'Bitmap',
 9 |             'BitmapRLE',
10 |             'BloomFilter',
11 |             'BloomFilterA2',
12 |             'HyperLogLog',
13 |             'MinHash']
14 | 


--------------------------------------------------------------------------------
/tools/docurium/gen-doc:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | 
 3 | set -o errexit -o nounset
 4 | 
 5 | DOCU_HOME="$(cd "$(dirname "$(which "$0")")"/.. >/dev/null; pwd -P)"/docurium
 6 | 
 7 | install() {
 8 |   bundler install --binstubs=bin/ --path vendor
 9 | }
10 | 
11 | generate() {
12 |   LLVM_CONFIG=llvm-config-3.8 bin/cm doc api.docurium
13 | }
14 | 
15 | pushd "$DOCU_HOME"
16 | 
17 | if [ ! -e bin/cm ]; then
18 |   install
19 | fi
20 | 
21 | generate
22 | 
23 | popd
24 | 


--------------------------------------------------------------------------------
/tests/examples/example-bitmap.c:
--------------------------------------------------------------------------------
 1 | #include <assert.h>
 2 | #include <twiddle/bitmap/bitmap.h>
 3 | 
 4 | int main()
 5 | {
 6 |   const uint64_t nbits = 1024;
 7 |   struct tw_bitmap *bitmap = tw_bitmap_new(nbits);
 8 | 
 9 |   assert(bitmap);
10 | 
11 |   tw_bitmap_set(bitmap, 512);
12 |   assert(tw_bitmap_test_and_clear(bitmap, 512));
13 |   assert(!tw_bitmap_test(bitmap, 512));
14 | 
15 |   tw_bitmap_set(bitmap, 768);
16 |   assert(tw_bitmap_find_first_bit(bitmap) == 768);
17 | 
18 |   tw_bitmap_free(bitmap);
19 | 
20 |   return 0;
21 | }
22 | 


--------------------------------------------------------------------------------
/python/tests/test_minhash.py:
--------------------------------------------------------------------------------
 1 | from math import log
 2 | from hypothesis import given
 3 | from test_helpers import double_set
 4 | from twiddle import MinHash
 5 | from unittest import TestCase
 6 | 
 7 | class TestMinHash(TestCase):
 8 |   @given(double_set)
 9 |   def test_bloomfilter_union(self, n_xs_ys):
10 |     n, xs, ys = n_xs_ys
11 |     x, y = MinHash.from_iterable(n, xs), MinHash.from_iterable(n, ys)
12 | 
13 |     # tests __or__
14 |     z = x | y
15 |     assert(x != z)
16 | 
17 |     # tests __ior__
18 |     x |= y
19 |     assert(x == z)
20 | 


--------------------------------------------------------------------------------
/tools/git/hook-clang-format:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | set -o nounset
 3 | 
 4 | FILE_EXTS=${FILE_EXTS:-(c|h)}
 5 | 
 6 | get_files() {
 7 |   git diff --cached --name-only --diff-filter=ACMRT | \
 8 |   egrep "\.$FILE_EXTS\$"
 9 | }
10 | 
11 | _IFS=$IFS
12 | IFS=$'\n'
13 | FILES=($(get_files))
14 | IFS=$_IFS
15 | 
16 | for ((i = 0; i < ${#FILES[@]}; ++i)); do
17 |   file=${FILES[$i]}
18 |   clang-format "$file" | cmp -s "$file" -
19 |   if [ $? -ne 0 ]; then
20 |     echo "File does not respect formatting style: $file" 1>&2
21 |     exit 1
22 |   fi
23 | done
24 | 


--------------------------------------------------------------------------------
/include/twiddle/utils/projection.h:
--------------------------------------------------------------------------------
 1 | #ifndef TWIDDLE_UTILS_PROJECTION_H
 2 | #define TWIDDLE_UTILS_PROJECTION_H
 3 | 
 4 | inline uint32_t tw_projection_mod_32(uint32_t a, uint32_t b) { return a % b; }
 5 | 
 6 | inline uint64_t tw_projection_mod_64(uint64_t a, uint64_t b) { return a % b; }
 7 | 
 8 | inline uint32_t tw_projection_mul_32(uint32_t a, uint32_t b)
 9 | {
10 |   return ((uint64_t)a * (uint64_t)b) >> 32;
11 | }
12 | 
13 | inline uint64_t tw_projection_mul_64(uint64_t a, uint64_t b)
14 | {
15 |   return ((__uint128_t)a * (__uint128_t)b) >> 64;
16 | }
17 | 
18 | #endif /* TWIDDLE_UTILS_PROJECTION_H */
19 | 


--------------------------------------------------------------------------------
/python/tests/test_hyperloglog.py:
--------------------------------------------------------------------------------
 1 | from math import log
 2 | from hypothesis import given
 3 | from test_helpers import TwiddleTest, single_set, double_set
 4 | from twiddle import HyperLogLog
 5 | from unittest import TestCase
 6 | 
 7 | class TestHyperLogLog(TestCase):
 8 |   @given(double_set)
 9 |   def test_bloomfilter_union(self, n_xs_ys):
10 |     n, xs, ys = n_xs_ys
11 |     n = int(log(n, 2))
12 |     x, y = HyperLogLog.from_iterable(n, xs), HyperLogLog.from_iterable(n, ys)
13 | 
14 |     # tests __or__
15 |     z = x | y
16 |     assert(x != z)
17 | 
18 |     # tests __ior__
19 |     x |= y
20 |     assert(x == z)
21 | 


--------------------------------------------------------------------------------
/tests/examples/example-hyperloglog.c:
--------------------------------------------------------------------------------
 1 | #include <assert.h>
 2 | #include <stdio.h>
 3 | 
 4 | #include <twiddle/hyperloglog/hyperloglog.h>
 5 | 
 6 | int main()
 7 | {
 8 |   const uint8_t precision = 16;
 9 |   struct tw_hyperloglog *hll = tw_hyperloglog_new(precision);
10 |   assert(hll);
11 | 
12 |   const uint32_t n_elems = 10 * (1 << precision);
13 |   for (size_t i = 0; i < n_elems; ++i) {
14 |     tw_hyperloglog_add(hll, (void *)&i, sizeof(i));
15 |   }
16 | 
17 |   printf("estimated count: %f, real count: %d\n", tw_hyperloglog_count(hll),
18 |          n_elems);
19 | 
20 |   tw_hyperloglog_free(hll);
21 | 
22 |   return 0;
23 | }
24 | 


--------------------------------------------------------------------------------
/docs/tw_bitmap.2.md:
--------------------------------------------------------------------------------
 1 | % tw_bitmap(2)
 2 | % François Saint-Jacques
 3 | % 2015-09-03
 4 | 
 5 | # NAME
 6 | 
 7 | tw_bitmap – libtwiddle basic bitmap implementation
 8 | 
 9 | # SYNOPSIS
10 | 
11 | ```c
12 | #include <libtwiddle/bitmap.h>
13 | 
14 | struct tw_bitmap * tw_bitmap_new(uint32_t nbits);
15 | void tw_bitmap_free(struct tw_bitmap *bitmap);
16 | 
17 | void tw_bitmap_set(struct tw_bitmap *bitmap, uint32_t pos);
18 | void tw_bitmap_clear(struct tw_bitmap *bitmap, uint32_t pos);
19 | 
20 | bool tw_bitmap_test(struct tw_bitmap *bitmap, uint32_t pos);
21 | ```
22 | 
23 | # DESCRIPTION
24 | 
25 | **tw_bitmap** is a simple struct
26 | 
27 | # RETURN VALUE
28 | 
29 | # SEE ALSO
30 | 


--------------------------------------------------------------------------------
/tools/cram/ccram:
--------------------------------------------------------------------------------
 1 | #!/bin/sh
 2 | 
 3 | if [ "$1" = "--python" ]; then
 4 |     shift
 5 |     PYTHON="$1"
 6 |     shift
 7 | else
 8 |     PYTHON=python
 9 | fi
10 | 
11 | if [ "$1" = "--root" ]; then
12 |     shift
13 |     ROOT="$1"
14 |     shift
15 | else
16 |     ROOT=$(dirname $PWD)
17 | fi
18 | 
19 | if [ "$1" = "--build" ]; then
20 |     shift
21 |     BUILD="$1"
22 |     shift
23 | else
24 |     BUILD=$(dirname $PWD)
25 | fi
26 | 
27 | if [ "$1" = "--tests" ]; then
28 |     shift
29 |     TESTS="$1"
30 |     shift
31 | else
32 |     TESTS=../tests
33 | fi
34 | 
35 | export ROOT
36 | 
37 | LD_LIBRARY_PATH="$PWD/src:$LD_LIBRARY_PATH" \
38 | PATH="$BUILD:$PATH" \
39 |   "$PYTHON" "$ROOT/../../tools/cram/cram.py" "$@" "$TESTS"
40 | 


--------------------------------------------------------------------------------
/tests/examples/example-bloomfilter.c:
--------------------------------------------------------------------------------
 1 | #include <assert.h>
 2 | #include <string.h>
 3 | 
 4 | #include <twiddle/bloomfilter/bloomfilter.h>
 5 | 
 6 | int main()
 7 | {
 8 |   const uint64_t nbits = 1024;
 9 |   const uint16_t k = 7;
10 |   struct tw_bloomfilter *bf = tw_bloomfilter_new(nbits, k);
11 |   assert(bf);
12 | 
13 |   const char *values[] = {"herp", "derp", "ferp", "merp"};
14 | 
15 |   for (size_t i = 0; i < ((sizeof(values) / sizeof(values[0]))); ++i) {
16 |     tw_bloomfilter_set(bf, values[i], strlen(values[i]));
17 |     assert(tw_bloomfilter_test(bf, values[i], strlen(values[i])));
18 |   }
19 | 
20 |   assert(!tw_bloomfilter_test(bf, "nope", sizeof("nope")));
21 | 
22 |   tw_bloomfilter_free(bf);
23 | 
24 |   return 0;
25 | }
26 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | Copyright (C) 2015-2016 François Saint-Jacques <fsaintjacques@gmail.com>
 2 | 
 3 | This program is free software: you can redistribute it and/or modify
 4 | it under the terms of the GNU Lesser General Public License as published by
 5 | the Free Software Foundation, either version 3 of the License, or
 6 | (at your option) any later version.
 7 | 
 8 | This program is distributed in the hope that it will be useful,
 9 | but WITHOUT ANY WARRANTY; without even the implied warranty of
10 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
11 | GNU Lesser General Public License for more details.
12 | 
13 | You should have received a copy of the GNU Lesser General Public License
14 | along with this program.  If not, see <http://www.gnu.org/licenses/>.
15 | 


--------------------------------------------------------------------------------
/docs/tw_bitmap.2:
--------------------------------------------------------------------------------
 1 | .TH "tw_bitmap" "2" "2015-09-03" "libtwiddle" "libtwiddle\ documentation"
 2 | .SH NAME
 3 | .PP
 4 | tw_bitmap \[en] libtwiddle basic bitmap implementation
 5 | .SH SYNOPSIS
 6 | .IP
 7 | .nf
 8 | \f[C]
 9 | #include\ <libtwiddle/bitmap.h>
10 | 
11 | struct\ tw_bitmap\ *\ tw_bitmap_new(uint32_t\ nbits);
12 | void\ tw_bitmap_free(struct\ tw_bitmap\ *bitmap);
13 | 
14 | void\ tw_bitmap_set(struct\ tw_bitmap\ *bitmap,\ uint32_t\ pos);
15 | void\ tw_bitmap_clear(struct\ tw_bitmap\ *bitmap,\ uint32_t\ pos);
16 | 
17 | bool\ tw_bitmap_test(struct\ tw_bitmap\ *bitmap,\ uint32_t\ pos);
18 | \f[]
19 | .fi
20 | .SH DESCRIPTION
21 | .PP
22 | \f[B]tw_bitmap\f[] is a simple struct
23 | .SH RETURN VALUE
24 | .SH SEE ALSO
25 | .SH AUTHORS
26 | François Saint\-Jacques.
27 | 


--------------------------------------------------------------------------------
/CMakeLists.txt:
--------------------------------------------------------------------------------
 1 | cmake_minimum_required(VERSION 2.6)
 2 | set(PROJECT_NAME libtwiddle)
 3 | set(RELEASE_DATE 2016-05-07)
 4 | project(${PROJECT_NAME})
 5 | include(CTest)
 6 | 
 7 | option(USE_AVX "Enable AVX instructions" ON)
 8 | option(USE_AVX2 "Enable AVX2 instructions" OFF)
 9 | option(USE_AVX512 "Enable AVX512 instructions" OFF)
10 | option(USE_LTO "Enable LTO compilation optimization" ON)
11 | option(USE_STATIC_PIC "Enable -fPIC on static library" OFF)
12 | 
13 | set(CMAKE_MODULE_PATH "${CMAKE_CURRENT_SOURCE_DIR}/tools/cmake")
14 | find_package(ParseArguments)
15 | find_package(Prereqs)
16 | find_package(CTargets)
17 | find_package(Version)
18 | find_package(Options)
19 | find_package(LTO)
20 | 
21 | add_subdirectory(docs)
22 | add_subdirectory(include)
23 | add_subdirectory(src)
24 | add_subdirectory(tests)
25 | 


--------------------------------------------------------------------------------
/include/twiddle/utils/hash.h:
--------------------------------------------------------------------------------
 1 | #ifndef TWIDDLE_UTILS_HASH_H
 2 | #define TWIDDLE_UTILS_HASH_H
 3 | 
 4 | #include <stddef.h>
 5 | #include <stdint.h>
 6 | 
 7 | typedef struct {
 8 |   uint64_t h;
 9 |   uint64_t l;
10 | } tw_uint128_t;
11 | 
12 | uint64_t tw_hash_128_64(tw_uint128_t hash);
13 | 
14 | uint64_t tw_metrohash_64(const uint64_t seed, const void *key,
15 |                          const size_t key_len);
16 | 
17 | tw_uint128_t tw_metrohash_128(const uint64_t seed, const void *key,
18 |                               const size_t key_len);
19 | 
20 | uint64_t tw_murmur3_64(const uint64_t seed, const void *key,
21 |                        const size_t key_len);
22 | 
23 | tw_uint128_t tw_murmur3_128(const uint64_t seed, const void *key,
24 |                             const size_t key_len);
25 | 
26 | #endif /* TWIDDLE_HASH_HASH_H */
27 | 


--------------------------------------------------------------------------------
/tests/examples/example-minhash.c:
--------------------------------------------------------------------------------
 1 | #include <assert.h>
 2 | #include <stdio.h>
 3 | 
 4 | #include <twiddle/hash/minhash.h>
 5 | 
 6 | int main()
 7 | {
 8 |   const uint32_t n_registers = 1 << 13;
 9 |   struct tw_minhash *a = tw_minhash_new(n_registers);
10 |   assert(a);
11 |   struct tw_minhash *b = tw_minhash_clone(a);
12 |   assert(b);
13 | 
14 |   const uint32_t n_elems = 10 * n_registers;
15 |   for (size_t i = 0; i < n_elems; ++i) {
16 |     const size_t key_size = sizeof(i);
17 |     const void *key = (void *)&i;
18 |     if (i % 3 == 0) {
19 |       tw_minhash_add(a, key, key_size);
20 |     }
21 | 
22 |     if (i % 5 == 0) {
23 |       tw_minhash_add(b, key, key_size);
24 |     }
25 |   }
26 | 
27 |   printf("estimated jaccard: %f\n", tw_minhash_estimate(a, b));
28 | 
29 |   tw_minhash_free(b);
30 |   tw_minhash_free(a);
31 | 
32 |   return 0;
33 | }
34 | 


--------------------------------------------------------------------------------
/python/tests/test_helpers.py:
--------------------------------------------------------------------------------
 1 | from math import log
 2 | from hypothesis.strategies import integers, sets, just
 3 | from hypothesis.testrunners.forking import ForkingTestCase
 4 | 
 5 | 
 6 | single_set = integers(min_value=2**8, max_value=2**16).flatmap(lambda n:
 7 |                (just(n), sets(integers(min_value=0, max_value=n-1),
 8 |                          min_size=1, max_size=n,
 9 |                          average_size=n/log(n, 2))))
10 | 
11 | 
12 | double_set = integers(min_value=2**8, max_value=2**16).flatmap(lambda n:
13 |                (just(n), sets(integers(min_value=0, max_value=n-1),
14 |                               min_size=1, max_size=n,
15 |                               average_size=n/log(n, 2)),
16 |                          sets(integers(min_value=0, max_value=n-1),
17 |                               min_size=1, max_size=n,
18 |                               average_size=n/log(n, 2))))
19 | 
20 | 
21 | class TwiddleTest(ForkingTestCase):
22 |   pass
23 | 


--------------------------------------------------------------------------------
/python/setup.py:
--------------------------------------------------------------------------------
 1 | import sys
 2 | from setuptools import setup, find_packages
 3 | from setuptools.command.test import test as TestCommand
 4 | 
 5 | class PyTest(TestCommand):
 6 |     '''Define how to use pytest to test the code'''
 7 | 
 8 |     def finalize_options(self):
 9 |         TestCommand.finalize_options(self)
10 |         self.test_args = []
11 |         self.test_suite = True
12 | 
13 |     def run_tests(self):
14 |         #import here, cause outside the eggs aren't loaded
15 |         import pytest
16 |         sys.exit(pytest.main(['tests']))
17 | 
18 | setup(
19 |     name="twiddle",
20 |     description="python bindings to libtwiddle",
21 |     version="0.0.0",
22 |     author="Francois Saint-Jacques",
23 |     author_email="fsaintjacques@gmail.com",
24 |     url="https://github.com/fsaintjacques/libtwiddle",
25 |     py_modules=['twiddle'],
26 |     license="LGPL License, version 3.0",
27 |     packages=find_packages(exclude=['tests']),
28 |     tests_require=['hypothesis==1.19.0', 'pytest'],
29 |     cmdclass={'test': PyTest}
30 |     )
31 | 


--------------------------------------------------------------------------------
/tests/examples/example-bitmap-rle.c:
--------------------------------------------------------------------------------
 1 | #include <assert.h>
 2 | #include <twiddle/bitmap/bitmap_rle.h>
 3 | 
 4 | int main()
 5 | {
 6 |   /** allocate a bitmap containing 2 billions bits */
 7 |   const uint64_t nbits = 1UL << 31;
 8 |   struct tw_bitmap_rle *bitmap = tw_bitmap_rle_new(nbits);
 9 | 
10 |   assert(bitmap);
11 | 
12 |   /** fill 1 billion bits */
13 |   const uint64_t start = 0UL;
14 |   const uint64_t end = 1UL << 30;
15 |   tw_bitmap_rle_set_range(bitmap, start, end);
16 | 
17 |   /**
18 |    * bitmap_rle DOES NOT support setting bits in non sorted order, e.g.
19 |    * calling the following will raise an assert() call (or undefined behaviour
20 |    * if compiled with NDEBUG):
21 |    *
22 |    * tw_bitmap_rle_set(bitmap, start - 1);
23 |    */
24 | 
25 |   assert(tw_bitmap_rle_test(bitmap, start));
26 |   assert(tw_bitmap_rle_test(bitmap, end));
27 |   assert(tw_bitmap_rle_find_first_bit(bitmap) == (int64_t)start);
28 |   assert(tw_bitmap_rle_find_first_zero(bitmap) == (int64_t)end + 1);
29 | 
30 |   tw_bitmap_rle_free(bitmap);
31 | 
32 |   return 0;
33 | }
34 | 


--------------------------------------------------------------------------------
/tests/examples/CMakeLists.txt:
--------------------------------------------------------------------------------
 1 | add_c_test(example-bitmap)
 2 | add_c_test(example-bitmap-rle)
 3 | add_c_test(example-bloomfilter)
 4 | add_c_test(example-bloomfilter-a2)
 5 | add_c_test(example-hyperloglog)
 6 | add_c_test(example-minhash)
 7 | 
 8 | add_c_example(bf-uniq)
 9 | add_c_example(hll-wc)
10 | 
11 | find_package(PythonInterp)
12 | 
13 | if (PYTHON_EXECUTABLE)
14 |     file(GLOB_RECURSE TESTS "${CMAKE_CURRENT_SOURCE_DIR}/*.t")
15 |     foreach(TEST ${TESTS})
16 |         get_filename_component(TEST_NAME "${TEST}" NAME_WE)
17 |         add_test(
18 |             ${TEST_NAME}
19 |             ${CMAKE_COMMAND} -E chdir ${CMAKE_BINARY_DIR}
20 |             ${CMAKE_BINARY_DIR}/../tools/cram/ccram
21 |                 --python ${PYTHON_EXECUTABLE}
22 |                 --root  ${CMAKE_CURRENT_SOURCE_DIR}
23 |                 --build ${CMAKE_BINARY_DIR}/tests/examples
24 |                 --tests ${TEST}
25 |         )
26 |     endforeach(TEST)
27 | else (PYTHON_EXECUTABLE)
28 |     message(WARNING "Unable to find Python; skipping cram tests.")
29 | endif (PYTHON_EXECUTABLE)
30 | 


--------------------------------------------------------------------------------
/src/twiddle/utils/internal.h:
--------------------------------------------------------------------------------
 1 | #ifndef TWIDDLE_HASH_INTERNAL_H
 2 | #define TWIDDLE_HASH_INTERNAL_H
 3 | 
 4 | #define BIG_CONSTANT(x) (x##LLU)
 5 | 
 6 | static inline uint32_t rotl32(uint32_t x, int8_t r)
 7 | {
 8 |   return (x << r) | (x >> (32 - r));
 9 | }
10 | 
11 | static inline uint32_t rotr32(uint32_t x, int8_t r)
12 | {
13 |   return (x >> r) | (x << (32 - r));
14 | }
15 | 
16 | static inline uint64_t rotl64(uint64_t x, int8_t r)
17 | {
18 |   return (x << r) | (x >> (64 - r));
19 | }
20 | 
21 | static inline uint64_t rotr64(uint64_t x, int8_t r)
22 | {
23 |   return (x >> r) | (x << (64 - r));
24 | }
25 | 
26 | static inline uint64_t cread_u64(const void *const ptr)
27 | {
28 |   return *(uint64_t *)ptr;
29 | }
30 | 
31 | static inline uint64_t cread_u32(const void *const ptr)
32 | {
33 |   return *(uint32_t *)ptr;
34 | }
35 | 
36 | static inline uint64_t cread_u16(const void *const ptr)
37 | {
38 |   return *(uint16_t *)ptr;
39 | }
40 | 
41 | static inline uint64_t cread_u8(const void *const ptr)
42 | {
43 |   return *(uint8_t *)ptr;
44 | }
45 | 
46 | #endif /* TWIDDLE_HASH_INTERNAL_H */
47 | 


--------------------------------------------------------------------------------
/tests/examples/example-bloomfilter-a2.c:
--------------------------------------------------------------------------------
 1 | #include <assert.h>
 2 | #include <string.h>
 3 | 
 4 | #include <twiddle/bloomfilter/bloomfilter_a2.h>
 5 | 
 6 | int main()
 7 | {
 8 |   const uint64_t nbits = 1024;
 9 |   const uint16_t k = 7;
10 |   const float density = 0.50;
11 |   struct tw_bloomfilter_a2 *bf = tw_bloomfilter_a2_new(nbits, k, density);
12 |   assert(bf);
13 | 
14 |   /**
15 |    * An active-active -bloomfilter (a2-bloomfilter) is a pair of bloomfilters,
16 |    * one active, one passive that gets rotated when
17 |    * active.density >= density_thresold is reached. Before rotation the passive
18 |    * bloomfilter gets cleared.
19 |    *
20 |    * Since density is applied localy to the active bloom filter, the whole
21 |    * bloomfilter density is maximised by 2 * density.
22 |    *
23 |    * Thus an a2-bloomfilter a bloomfilter with a LRU (on write operation)
24 |    * eviction policy.
25 |    */
26 | 
27 |   for (size_t i = 0; i < nbits * 10; ++i) {
28 |     tw_bloomfilter_a2_set(bf, (void *)&i, sizeof(i));
29 |     assert(tw_bloomfilter_a2_density(bf) < 2 * density);
30 |   }
31 | 
32 |   tw_bloomfilter_a2_free(bf);
33 | 
34 |   return 0;
35 | }
36 | 


--------------------------------------------------------------------------------
/python/tests/test_bloomfilter.py:
--------------------------------------------------------------------------------
 1 | from hypothesis import given
 2 | from test_helpers import TwiddleTest, single_set, double_set
 3 | from twiddle import BloomFilter
 4 | 
 5 | class TestBloomFilter(TwiddleTest):
 6 |   @given(single_set)
 7 |   def test_bloomfilter_negation(self, n_xs):
 8 |     n, xs = n_xs
 9 |     x = BloomFilter.from_iterable(n, 8, xs)
10 |     y = -x
11 |     assert(x == -y)
12 | 
13 | 
14 |   @given(double_set)
15 |   def test_bloomfilter_union(self, n_xs_ys):
16 |     n, xs, ys = n_xs_ys
17 |     x, y = BloomFilter.from_iterable(n, 8, xs), BloomFilter.from_iterable(n, 8, ys)
18 | 
19 |     # tests __or__
20 |     z = x | y
21 |     assert(z == BloomFilter.from_iterable(n, 8, xs | ys))
22 | 
23 |     # tests __ior__
24 |     x |= y
25 |     assert(x == z)
26 | 
27 | 
28 |   @given(double_set)
29 |   def test_bloomfilter_intersection(self, n_xs_ys):
30 |     n, xs, ys = n_xs_ys
31 |     x, y = BloomFilter.from_iterable(n, 8, xs), BloomFilter.from_iterable(n, 8, ys)
32 |     zs = xs & ys
33 | 
34 |     # tests __and__
35 |     z = x & y
36 |     for e in zs:
37 |       assert(e in z)
38 | 
39 |     # tests __iand__
40 |     x &= y
41 |     for e in zs:
42 |       assert(e in x)
43 | 


--------------------------------------------------------------------------------
/tests/check/CMakeLists.txt:
--------------------------------------------------------------------------------
 1 | #
 2 | # Check: a unit test framework for C
 3 | # Copyright (C) 2011 Mateusz Loskot
 4 | # Copyright (C) 2001, 2002 Arien Malec
 5 | #
 6 | # This library is free software; you can redistribute it and/or
 7 | # modify it under the terms of the GNU Lesser General Public
 8 | # License as published by the Free Software Foundation; either
 9 | # version 2.1 of the License, or (at your option) any later version.
10 | #
11 | # This library is distributed in the hope that it will be useful,
12 | # but WITHOUT ANY WARRANTY; without even the implied warranty of
13 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
14 | # Lesser General Public License for more details.
15 | #
16 | # You should have received a copy of the GNU Lesser General Public
17 | # License along with this library; if not, write to the
18 | # Free Software Foundation, Inc., 59 Temple Place - Suite 330,
19 | # Boston, MA 02111-1307, USA.
20 | #
21 | 
22 | set(SOURCES
23 |   check.c
24 |   check_error.c
25 |   check_list.c
26 |   check_log.c
27 |   check_msg.c
28 |   check_pack.c
29 |   check_print.c
30 |   check_run.c
31 |   check_str.c)
32 | 
33 | add_c_test_library(
34 |   check
35 |   OUTPUT_NAME check
36 |   SOURCES ${SOURCES})
37 | 


--------------------------------------------------------------------------------
/tests/check/check_error.h:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Check: a unit test framework for C
 3 |  * Copyright (C) 2001, 2002 Arien Malec
 4 |  *
 5 |  * This library is free software; you can redistribute it and/or
 6 |  * modify it under the terms of the GNU Lesser General Public
 7 |  * License as published by the Free Software Foundation; either
 8 |  * version 2.1 of the License, or (at your option) any later version.
 9 |  *
10 |  * This library is distributed in the hope that it will be useful,
11 |  * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 |  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
13 |  * Lesser General Public License for more details.
14 |  *
15 |  * You should have received a copy of the GNU Lesser General Public
16 |  * License along with this library; if not, write to the
17 |  * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
18 |  * Boston, MA 02111-1307, USA.
19 |  */
20 | 
21 | #ifndef ERROR_H
22 | #define ERROR_H
23 | 
24 | #include "libcompat.h"
25 | #include <setjmp.h>
26 | 
27 | extern jmp_buf error_jmp_buffer;
28 | 
29 | /* Include stdlib.h beforehand */
30 | 
31 | /* Print error message and die
32 |    If fmt ends in colon, include system error information */
33 | void eprintf(const char *fmt, const char *file, int line,
34 |              ...) CK_ATTRIBUTE_NORETURN;
35 | /* malloc or die */
36 | void *emalloc(size_t n);
37 | void *erealloc(void *, size_t n);
38 | 
39 | #endif /*ERROR_H */
40 | 


--------------------------------------------------------------------------------
/tests/check/check_print.h:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Check: a unit test framework for C
 3 |  * Copyright (C) 2001, 2002 Arien Malec
 4 |  *
 5 |  * This library is free software; you can redistribute it and/or
 6 |  * modify it under the terms of the GNU Lesser General Public
 7 |  * License as published by the Free Software Foundation; either
 8 |  * version 2.1 of the License, or (at your option) any later version.
 9 |  *
10 |  * This library is distributed in the hope that it will be useful,
11 |  * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 |  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
13 |  * Lesser General Public License for more details.
14 |  *
15 |  * You should have received a copy of the GNU Lesser General Public
16 |  * License along with this library; if not, write to the
17 |  * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
18 |  * Boston, MA 02111-1307, USA.
19 |  */
20 | 
21 | #ifndef CHECK_PRINT_H
22 | #define CHECK_PRINT_H
23 | 
24 | /* escape XML special characters (" ' < > &) in str and print to file */
25 | void fprint_xml_esc(FILE * file, const char *str);
26 | void tr_fprint(FILE * file, TestResult * tr, enum print_output print_mode);
27 | void tr_xmlprint(FILE * file, TestResult * tr, enum print_output print_mode);
28 | void srunner_fprint(FILE * file, SRunner * sr, enum print_output print_mode);
29 | enum print_output get_env_printmode(void);
30 | 
31 | 
32 | #endif /* CHECK_PRINT_H */
33 | 


--------------------------------------------------------------------------------
/tests/check/check_msg.h:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Check: a unit test framework for C
 3 |  * Copyright (C) 2001, 2002 Arien Malec
 4 |  *
 5 |  * This library is free software; you can redistribute it and/or
 6 |  * modify it under the terms of the GNU Lesser General Public
 7 |  * License as published by the Free Software Foundation; either
 8 |  * version 2.1 of the License, or (at your option) any later version.
 9 |  *
10 |  * This library is distributed in the hope that it will be useful,
11 |  * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 |  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
13 |  * Lesser General Public License for more details.
14 |  *
15 |  * You should have received a copy of the GNU Lesser General Public
16 |  * License along with this library; if not, write to the
17 |  * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
18 |  * Boston, MA 02111-1307, USA.
19 |  */
20 | 
21 | #ifndef CHECK_MSG_NEW_H
22 | #define CHECK_MSG_NEW_H
23 | 
24 | 
25 | /* Functions implementing messaging during test runs */
26 | 
27 | void send_failure_info(const char *msg);
28 | void send_loc_info(const char *file, int line);
29 | void send_ctx_info(enum ck_result_ctx ctx);
30 | void send_duration_info(int duration);
31 | 
32 | TestResult *receive_test_result(int waserror);
33 | 
34 | void setup_messaging(void);
35 | void teardown_messaging(void);
36 | 
37 | FILE *open_tmp_file(char **name);
38 | 
39 | #endif /*CHECK_MSG_NEW_H */
40 | 


--------------------------------------------------------------------------------
/tests/benchmarks/bench-minhash.c:
--------------------------------------------------------------------------------
 1 | #include <stdlib.h>
 2 | 
 3 | #include <twiddle/hash/minhash.h>
 4 | 
 5 | #include "benchmark.h"
 6 | 
 7 | void minhash_setup(struct benchmark *b)
 8 | {
 9 | 
10 |   b->opaque = (void *)tw_minhash_new(b->size);
11 | }
12 | 
13 | void minhash_teardown(struct benchmark *b)
14 | {
15 |   tw_minhash_free(b->opaque);
16 |   b->opaque = NULL;
17 | }
18 | 
19 | void minhash_add(void *opaque)
20 | {
21 |   struct tw_minhash *h = (struct tw_minhash *)opaque;
22 | 
23 |   for (size_t i = 0; i < 10000; i++)
24 |     tw_minhash_add(h, &i, sizeof(i));
25 | }
26 | 
27 | void minhash_est(void *opaque)
28 | {
29 |   struct tw_minhash *h = (struct tw_minhash *)opaque;
30 | 
31 |   for (size_t i = 0; i < 10000; i++)
32 |     tw_minhash_estimate(h, h);
33 | }
34 | 
35 | int main(int argc, char *argv[])
36 | {
37 | 
38 |   if (argc != 3) {
39 |     fprintf(stderr, "usage: %s <repeat> <size>\n", argv[0]);
40 |     return EXIT_FAILURE;
41 |   }
42 | 
43 |   const size_t repeat = strtol(argv[1], NULL, 10);
44 |   const size_t size = strtol(argv[2], NULL, 10);
45 | 
46 |   struct benchmark benchmarks[] = {
47 |       BENCHMARK_FIXTURE(minhash_add, repeat, size, minhash_setup,
48 |                         minhash_teardown),
49 |       BENCHMARK_FIXTURE(minhash_est, repeat, size, minhash_setup,
50 |                         minhash_teardown)};
51 | 
52 |   run_benchmarks(benchmarks, sizeof(benchmarks) / sizeof(benchmarks[0]));
53 | 
54 |   return EXIT_SUCCESS;
55 | }
56 | 


--------------------------------------------------------------------------------
/tools/cmake/FindVersion.cmake:
--------------------------------------------------------------------------------
 1 | #-----------------------------------------------------------------------
 2 | # Retrieve the current version number
 3 | 
 4 | execute_process(
 5 |     COMMAND git describe
 6 |     WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}
 7 |     RESULT_VARIABLE VERSION_RESULT
 8 |     OUTPUT_VARIABLE VERSION
 9 |     OUTPUT_STRIP_TRAILING_WHITESPACE
10 | )
11 | if(VERSION_RESULT)
12 |     set(VERSION "0.0.0")
13 |     message(STATUS "Cannot determine version number reverting to: " ${VERSION})
14 | endif(VERSION_RESULT)
15 | message(STATUS "Current version: " ${VERSION})
16 | 
17 | if(VERSION MATCHES "^([0-9]+)\\.([0-9]+)\\.([0-9]+)(-.*)?$")
18 |     set(VERSION_MAJOR "${CMAKE_MATCH_1}")
19 |     set(VERSION_MINOR "${CMAKE_MATCH_2}")
20 |     set(VERSION_PATCH "${CMAKE_MATCH_3}")
21 |     set(VERSION_COMMIT "${CMAKE_MATCH_4}")
22 | else(VERSION MATCHES "^([0-9]+)\\.([0-9]+)\\.([0-9]+)(-.*)?$")
23 |     message(FATAL_ERROR "Invalid version number: ${VERSION}")
24 | endif(VERSION MATCHES "^([0-9]+)\\.([0-9]+)\\.([0-9]+)(-.*)?$")
25 | 
26 | execute_process(
27 |     COMMAND git rev-parse HEAD
28 |     WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}
29 |     RESULT_VARIABLE GIT_SHA1_RESULT
30 |     OUTPUT_VARIABLE GIT_SHA1
31 |     OUTPUT_STRIP_TRAILING_WHITESPACE
32 | )
33 | if(GIT_SHA1_RESULT)
34 |     message(FATAL_ERROR
35 |             "Cannot determine git commit: " ${GIT_SHA1_RESULT})
36 | endif(GIT_SHA1_RESULT)
37 | message(STATUS "Current revision: " ${GIT_SHA1})
38 | 


--------------------------------------------------------------------------------
/tools/cmake/FindLTO.cmake:
--------------------------------------------------------------------------------
 1 | # Credits goes to nkurz see
 2 | # https://github.com/RoaringBitmap/CRoaring/blob/master/tools/cmake/FindLTO.cmake
 3 | 
 4 | macro(append var string)
 5 |   set(${var} "${${var}} ${string}")
 6 | endmacro(append)
 7 | 
 8 | if(USE_LTO)
 9 |     if ("${CMAKE_C_COMPILER_ID}" MATCHES "Intel")
10 |         append(CMAKE_C_FLAGS "-ipo")
11 |         append(CMAKE_CXX_FLAGS "-ipo")
12 |     elseif ("${CMAKE_C_COMPILER_ID}" MATCHES "Clang")
13 |         append(CMAKE_C_FLAGS "-flto")
14 |         append(CMAKE_CXX_FLAGS "-flto")
15 |         append(CMAKE_EXE_LINKER_FLAGS "-fuse-ld=gold -flto")
16 |         append(CMAKE_SHARED_LINKER_FLAGS "-fuse-ld=gold -flto")
17 |         if(NOT ("${CMAKE_AR}" MATCHES "llvm-ar-*"))
18 |             message(FATAL_ERROR
19 |               "You requested clang compiler & LTO without providing CMAKE_AR. "
20 |               "CMAKE_AR is currently set to ${CMAKE_AR}. "
21 |               "To fix this, invoke cmake with `-DCMAKE_AR=$(which llvm-ar-$version)`.")
22 |         endif()
23 |         if(NOT ("${CMAKE_RANLIB}" MATCHES "llvm-ranlib-*"))
24 |             message(FATAL_ERROR
25 |               "You requested clang compiler & LTO without providing CMAKE_RANLIB. "
26 |               "CMAKE_RANLIB is currently set to ${CMAKE_RANLIB}. "
27 |               "To fix this, invoke cmake `-DCMAKE_RANLIB=$(which llvm-ranlib-$version).`")
28 |         endif()
29 |     else()
30 |         append(CMAKE_C_FLAGS "-flto")
31 |         append(CMAKE_CXX_FLAGS "-flto")
32 |         set(CMAKE_AR "gcc-ar")
33 |         set(CMAKE_RANLIB "gcc-ranlib")
34 |     endif()
35 | endif(USE_LTO)
36 | 


--------------------------------------------------------------------------------
/tools/cmake/FindParseArguments.cmake:
--------------------------------------------------------------------------------
 1 | # CMake 2.8.4 and higher gives us cmake_parse_arguments out of the box.  For
 2 | # earlier versions (RHEL5!) we have to define it ourselves.  (The definition
 3 | # comes from <http://www.cmake.org/Wiki/CMakeMacroParseArguments>.)
 4 | 
 5 | if (CMAKE_VERSION VERSION_LESS "2.8.4")
 6 | 
 7 | MACRO(CMAKE_PARSE_ARGUMENTS prefix arg_names option_names)
 8 |   SET(DEFAULT_ARGS)
 9 |   FOREACH(arg_name ${arg_names})
10 |     SET(${prefix}_${arg_name})
11 |   ENDFOREACH(arg_name)
12 |   FOREACH(option ${option_names})
13 |     SET(${prefix}_${option} FALSE)
14 |   ENDFOREACH(option)
15 | 
16 |   SET(current_arg_name DEFAULT_ARGS)
17 |   SET(current_arg_list)
18 |   FOREACH(arg ${ARGN})
19 |     SET(larg_names ${arg_names})
20 |     LIST(FIND larg_names "${arg}" is_arg_name)
21 |     IF (is_arg_name GREATER -1)
22 |       SET(${prefix}_${current_arg_name} ${current_arg_list})
23 |       SET(current_arg_name ${arg})
24 |       SET(current_arg_list)
25 |     ELSE (is_arg_name GREATER -1)
26 |       SET(loption_names ${option_names})
27 |       LIST(FIND loption_names "${arg}" is_option)
28 |       IF (is_option GREATER -1)
29 |           SET(${prefix}_${arg} TRUE)
30 |       ELSE (is_option GREATER -1)
31 |           SET(current_arg_list ${current_arg_list} ${arg})
32 |       ENDIF (is_option GREATER -1)
33 |     ENDIF (is_arg_name GREATER -1)
34 |   ENDFOREACH(arg)
35 |   SET(${prefix}_${current_arg_name} ${current_arg_list})
36 | ENDMACRO(CMAKE_PARSE_ARGUMENTS)
37 | 
38 | else (CMAKE_VERSION VERSION_LESS "2.8.4")
39 | 
40 |     include(CMakeParseArguments)
41 | 
42 | endif (CMAKE_VERSION VERSION_LESS "2.8.4")
43 | 


--------------------------------------------------------------------------------
/python/tests/test_bitmap_rle.py:
--------------------------------------------------------------------------------
 1 | from hypothesis import given, example
 2 | from test_helpers import TwiddleTest, single_set, double_set
 3 | from twiddle import BitmapRLE
 4 | 
 5 | class TestBitmapRLE(TwiddleTest):
 6 |   @given(single_set)
 7 |   def test_bitmap_find_first_zero(self, n_xs):
 8 |     n, xs = n_xs
 9 |     x = BitmapRLE.from_indices(n, xs)
10 | 
11 |     expected = -1 if x.full() else min(set(range(0, n)) - xs)
12 |     first = x.find_first_zero()
13 |     assert(first == expected)
14 | 
15 | 
16 |   @given(single_set)
17 |   def test_bitmap_find_first_bit(self, n_xs):
18 |     n, xs = n_xs
19 |     x = BitmapRLE.from_indices(n, xs)
20 | 
21 |     expected = -1 if x.empty() else min(xs)
22 |     first = x.find_first_bit()
23 |     assert(first == expected)
24 | 
25 | 
26 |   @given(single_set)
27 |   def test_bitmap_negation(self, n_xs):
28 |     n, xs = n_xs
29 |     x = BitmapRLE.from_indices(n, xs)
30 | 
31 |     y = -x
32 | 
33 |     for idx in xs:
34 |       assert(idx not in y)
35 | 
36 |     assert(x != y)
37 |     assert(x == -y)
38 | 
39 | 
40 |   @given(double_set)
41 |   def test_bitmap_union(self, n_xs_ys):
42 |     n, xs, ys = n_xs_ys
43 |     x, y = BitmapRLE.from_indices(n, xs), BitmapRLE.from_indices(n, ys)
44 | 
45 |     # tests __or__
46 |     z = x | y
47 |     assert(z == BitmapRLE.from_indices(n, xs | ys))
48 | 
49 |     # tests __ior__
50 |     x |= y
51 |     assert(x == z)
52 | 
53 | 
54 |   @given(double_set)
55 |   def test_bitmap_intersection(self, n_xs_ys):
56 |     n, xs, ys = n_xs_ys
57 |     x, y = BitmapRLE.from_indices(n, xs), BitmapRLE.from_indices(n, ys)
58 | 
59 |     # tests __and__
60 |     z = x & y
61 |     assert(z == BitmapRLE.from_indices(n, xs & ys))
62 | 
63 |     # tests __iand__
64 |     x &= y
65 |     assert(x == z)
66 | 


--------------------------------------------------------------------------------
/tests/check/check_str.h:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Check: a unit test framework for C
 3 |  * Copyright (C) 2001, 2002 Arien Malec
 4 |  *
 5 |  * This library is free software; you can redistribute it and/or
 6 |  * modify it under the terms of the GNU Lesser General Public
 7 |  * License as published by the Free Software Foundation; either
 8 |  * version 2.1 of the License, or (at your option) any later version.
 9 |  *
10 |  * This library is distributed in the hope that it will be useful,
11 |  * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 |  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
13 |  * Lesser General Public License for more details.
14 |  *
15 |  * You should have received a copy of the GNU Lesser General Public
16 |  * License along with this library; if not, write to the
17 |  * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
18 |  * Boston, MA 02111-1307, USA.
19 |  */
20 | 
21 | #ifndef CHECK_STR_H
22 | #define CHECK_STR_H
23 | 
24 | /* Return a string representation of the given TestResult.  Return
25 |    value has been malloc'd, and must be freed by the caller */
26 | char *tr_str(TestResult * tr);
27 | 
28 | /* Return a string representation of the given TestResult message
29 |    without the test id or result type. This is suitable for separate
30 |    formatting of the test and the message. Return value has been 
31 |    malloc'd, and must be freed by the caller */
32 | char *tr_short_str(TestResult * tr);
33 | 
34 | /* Return a string representation of the given SRunner's run
35 |    statistics (% passed, num run, passed, errors, failures). Return
36 |    value has been malloc'd, and must be freed by the caller
37 | */
38 | char *sr_stat_str(SRunner * sr);
39 | 
40 | char *ck_strdup_printf(const char *fmt, ...);
41 | 
42 | #endif /* CHECK_STR_H */
43 | 


--------------------------------------------------------------------------------
/src/CMakeLists.txt:
--------------------------------------------------------------------------------
 1 | # Update the VERSION property below according to the following rules (taken from
 2 | # [1]):
 3 | #
 4 | # VERSION = current.revision.age
 5 | #
 6 | #   1. Start with a VERSION of `0.0.0` for each shared library.
 7 | #   2. Update VERSION only immediately before a public release of your software.
 8 | #      More frequent updates are unnecessary, and only guarantee that the
 9 | #      current interface number gets larger faster.
10 | #   3. If the library source code has changed at all since the last update, then
11 | #      increment `revision` (`c.r.a` becomes `c.r+1.a`).
12 | #   4. If any interfaces have been added, removed, or changed since the last
13 | #      update, increment `current`, and set `revision` to 0.
14 | #   5. If any interfaces have been added since the last public release, then
15 | #      increment `age`.
16 | #   6. If any interfaces have been removed or changed since the last public
17 | #      release, then set `age` to 0.
18 | #
19 | # Note that changing `current` means that you are releasing a new
20 | # backwards-incompatible version of the library.  This has implications on
21 | # packaging, so once an API has stabilized, this should be a rare occurrence.
22 | #
23 | # [1] http://www.gnu.org/software/libtool/manual/html_node/Updating-version-info.html#Updating-version-info
24 | set(CMAKE_C_FLAGS_RELEASE "${CMAKE_C_FLAGS_RELEASE} -funroll-loops")
25 | add_c_library(
26 |     libtwiddle
27 |     OUTPUT_NAME twiddle
28 |     PKGCONFIG_NAME libtwiddle
29 |     VERSION 1.0.0
30 |     SOURCES
31 |         twiddle/bitmap/bitmap.c
32 |         twiddle/bitmap/bitmap_rle.c
33 |         twiddle/bloomfilter/bloomfilter.c
34 |         twiddle/bloomfilter/bloomfilter_a2.c
35 |         twiddle/hyperloglog/hyperloglog.c
36 |         twiddle/hyperloglog/hyperloglog_bias.c
37 |         twiddle/hash/minhash.c
38 |         twiddle/utils/hash.c
39 |         twiddle/utils/murmur3.c
40 |         twiddle/utils/metrohash.c
41 | )
42 | 


--------------------------------------------------------------------------------
/tests/check/check_list.h:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Check: a unit test framework for C
 3 |  * Copyright (C) 2001, 2002 Arien Malec
 4 |  *
 5 |  * This library is free software; you can redistribute it and/or
 6 |  * modify it under the terms of the GNU Lesser General Public
 7 |  * License as published by the Free Software Foundation; either
 8 |  * version 2.1 of the License, or (at your option) any later version.
 9 |  *
10 |  * This library is distributed in the hope that it will be useful,
11 |  * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 |  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
13 |  * Lesser General Public License for more details.
14 |  *
15 |  * You should have received a copy of the GNU Lesser General Public
16 |  * License along with this library; if not, write to the
17 |  * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
18 |  * Boston, MA 02111-1307, USA.
19 |  */
20 | 
21 | #ifndef CHECK_LIST_H
22 | #define CHECK_LIST_H
23 | 
24 | typedef struct List List;
25 | 
26 | /* Create an empty list */
27 | List *check_list_create(void);
28 | 
29 | /* Is list at end? */
30 | int check_list_at_end(List * lp);
31 | 
32 | /* Position list at front */
33 | void check_list_front(List * lp);
34 | 
35 | /* Add a value to the front of the list,
36 |    positioning newly added value as current value.
37 |    More expensive than list_add_end, as it uses memmove. */
38 | void check_list_add_front(List * lp, void *val);
39 | 
40 | /* Add a value to the end of the list,
41 |    positioning newly added value as current value */
42 | void check_list_add_end(List * lp, void *val);
43 | 
44 | /* Give the value of the current node */
45 | void *check_list_val(List * lp);
46 | 
47 | /* Position the list at the next node */
48 | void check_list_advance(List * lp);
49 | 
50 | /* Free a list, but don't free values */
51 | void check_list_free(List * lp);
52 | 
53 | void check_list_apply(List * lp, void (*fp) (void *));
54 | 
55 | 
56 | #endif /* CHECK_LIST_H */
57 | 


--------------------------------------------------------------------------------
/python/twiddle/minhash.py:
--------------------------------------------------------------------------------
 1 | from c import libtwiddle
 2 | from ctypes import c_long, pointer
 3 | 
 4 | class MinHash(object):
 5 |   def __init__(self, n_registers, ptr=None):
 6 |     self.minhash     = ptr if ptr else libtwiddle.tw_minhash_new(n_registers)
 7 |     self.n_registers = n_registers
 8 | 
 9 | 
10 |   def __del__(self):
11 |     if self.minhash:
12 |       libtwiddle.tw_minhash_free(self.minhash)
13 | 
14 | 
15 |   @classmethod
16 |   def copy(cls, h):
17 |     return cls(h.n_registers, ptr=libtwiddle.tw_minhash_clone(h.minhash))
18 | 
19 | 
20 |   @classmethod
21 |   def from_iterable(cls, n_registers, iterable):
22 |     minhash = MinHash(n_registers)
23 | 
24 |     for i in iterable:
25 |       minhash.add(i)
26 | 
27 |     return minhash
28 | 
29 | 
30 |   def add(self, x):
31 |     h = pointer(c_long(hash(x)))
32 |     libtwiddle.tw_minhash_add(self.minhash, h, 8)
33 | 
34 | 
35 |   def __eq__(self, other):
36 |     if not isinstance(other, MinHash):
37 |       return False
38 | 
39 |     return libtwiddle.tw_minhash_equal(self.minhash, other.minhash)
40 | 
41 | 
42 |   def __op(self, other, func, copy=lambda x: MinHash.copy(x)):
43 |     if not isinstance(other, MinHash):
44 |       raise ValueError("Must compare MinHash to MinHash")
45 | 
46 |     if self.n_registers != other.n_registers:
47 |       raise ValueError("MinHashs must be of equal n_registers to be comparable")
48 | 
49 |     ret = copy(self)
50 | 
51 |     func(other.minhash, ret.minhash)
52 | 
53 |     return ret
54 | 
55 | 
56 |   def __iop(self, other, func):
57 |     return self.__op(other, func, copy=lambda x: x)
58 | 
59 | 
60 |   def __or__(self, other):
61 |     return self.__op(other, libtwiddle.tw_minhash_merge)
62 | 
63 | 
64 |   def __ior__(self, other):
65 |     return self.__iop(other, libtwiddle.tw_minhash_merge)
66 | 
67 | 
68 |   def estimate(self, other):
69 |     if not isinstance(other, MinHash):
70 |       raise ValueError("Must compare MinHash to MinHash")
71 | 
72 |     return libtwiddle.tw_minhash_estimate(self.minhash, other.minhash)
73 | 


--------------------------------------------------------------------------------
/tools/cmake/FindOptions.cmake:
--------------------------------------------------------------------------------
 1 | #-----------------------------------------------------------------------
 2 | # Set some options
 3 | 
 4 | # set C standard
 5 | if (CMAKE_VERSION VERSION_LESS "3.1")
 6 |   if (CMAKE_C_COMPILER_ID MATCHES "GNU" OR CMAKE_C_COMPILER_ID MATCHES "Clang")
 7 |     set (CMAKE_C_FLAGS "-std=gnu11 ${CMAKE_C_FLAGS}")
 8 |   endif ()
 9 | else ()
10 |   set (CMAKE_C_STANDARD 11)
11 | endif ()
12 | 
13 | 
14 | set(CMAKE_INSTALL_LIBDIR lib CACHE STRING
15 |       "The base name of the installation directory for libraries")
16 | 
17 | # default to Release type if not provided
18 | # compile to -03 with no assert
19 | if(NOT CMAKE_BUILD_TYPE)
20 |     set(CMAKE_BUILD_TYPE Release CACHE STRING
21 |         "Choose the type of build, options are: None Debug Release RelWithDebInfo MinSizeRel."
22 |         FORCE)
23 | endif(NOT CMAKE_BUILD_TYPE)
24 | 
25 | 
26 | if(USE_AVX512)
27 |   if (CMAKE_C_COMPILER_ID MATCHES "GNU" OR CMAKE_C_COMPILER_ID MATCHES "Clang")
28 |     add_definitions(-mavx512 -mavx2 -mavx)
29 |   elseif (CMAKE_C_COMPILER_ID MATCHES "Intel")
30 |     add_definitions(-xCORE-AVX512)
31 |   endif ()
32 |   add_definitions(-DUSE_AVX512=1 -DUSE_AVX2=1 -DUSE_AVX=1)
33 | elseif(USE_AVX2)
34 |   if (CMAKE_C_COMPILER_ID MATCHES "GNU" OR CMAKE_C_COMPILER_ID MATCHES "Clang")
35 |     add_definitions(-mavx2 -mavx)
36 |   elseif (CMAKE_C_COMPILER_ID MATCHES "Intel")
37 |     add_definitions(-xCORE-AVX2)
38 |   endif ()
39 |   add_definitions(-DUSE_AVX2=1)
40 |   add_definitions(-DUSE_AVX=1)
41 | elseif(USE_AVX)
42 |   if (CMAKE_C_COMPILER_ID MATCHES "GNU" OR CMAKE_C_COMPILER_ID MATCHES "Clang")
43 |     add_definitions(-mavx)
44 |   elseif (CMAKE_C_COMPILER_ID MATCHES "Intel")
45 |     add_definitions(-xCORE-AVX-I)
46 |   endif ()
47 |   add_definitions(-DUSE_AVX=1)
48 | else()
49 |   add_definitions(-DUSE_PORTABLE=1)
50 | endif()
51 | 
52 | # strict compile options
53 | add_definitions(-Wall -Werror -pedantic -Wextra -Wshadow -Wfloat-equal -Wundef
54 |                 -Wwrite-strings -Wunreachable-code -Wformat=2 -Winit-self)
55 | 


--------------------------------------------------------------------------------
/python/twiddle/hyperloglog.py:
--------------------------------------------------------------------------------
 1 | from c import libtwiddle
 2 | from ctypes import c_long, pointer
 3 | 
 4 | class HyperLogLog(object):
 5 |   def __init__(self, precision, ptr=None):
 6 |     self.hyperloglog = ptr if ptr else libtwiddle.tw_hyperloglog_new(precision)
 7 |     self.precision   = precision
 8 | 
 9 | 
10 |   def __del__(self):
11 |     if self.hyperloglog:
12 |       libtwiddle.tw_hyperloglog_free(self.hyperloglog)
13 | 
14 | 
15 |   @classmethod
16 |   def copy(cls, h):
17 |     return cls(h.precision, ptr=libtwiddle.tw_hyperloglog_clone(h.hyperloglog))
18 | 
19 | 
20 |   @classmethod
21 |   def from_iterable(cls, precision, iterable):
22 |     hyperloglog = HyperLogLog(precision)
23 | 
24 |     for i in iterable:
25 |       hyperloglog.add(i)
26 | 
27 |     return hyperloglog
28 | 
29 | 
30 |   def __len__(self):
31 |     return self.count()
32 | 
33 | 
34 |   def add(self, x):
35 |     h = pointer(c_long(hash(x)))
36 |     libtwiddle.tw_hyperloglog_add(self.hyperloglog, h, 8)
37 | 
38 | 
39 |   def __eq__(self, other):
40 |     if not isinstance(other, HyperLogLog):
41 |       return False
42 | 
43 |     return libtwiddle.tw_hyperloglog_equal(self.hyperloglog, other.hyperloglog)
44 | 
45 | 
46 |   def __op(self, other, func, copy=lambda x: HyperLogLog.copy(x)):
47 |     if not isinstance(other, HyperLogLog):
48 |       raise ValueError("Must compare HyperLogLog to HyperLogLog")
49 | 
50 |     if self.precision != other.precision:
51 |       raise ValueError("HyperLogLogs must be of equal precision to be comparable")
52 | 
53 |     ret = copy(self)
54 | 
55 |     func(other.hyperloglog, ret.hyperloglog)
56 | 
57 |     return ret
58 | 
59 | 
60 |   def __iop(self, other, func):
61 |     return self.__op(other, func, copy=lambda x: x)
62 | 
63 | 
64 |   def __or__(self, other):
65 |     return self.__op(other, libtwiddle.tw_hyperloglog_merge)
66 | 
67 | 
68 |   def __ior__(self, other):
69 |     return self.__iop(other, libtwiddle.tw_hyperloglog_merge)
70 | 
71 | 
72 |   def count(self):
73 |     return libtwiddle.tw_hyperloglog_count(self.hyperloglog)
74 | 


--------------------------------------------------------------------------------
/python/tests/test_bitmap.py:
--------------------------------------------------------------------------------
 1 | from hypothesis import given
 2 | from test_helpers import TwiddleTest, single_set, double_set
 3 | from twiddle import Bitmap
 4 | 
 5 | class TestBitmap(TwiddleTest):
 6 |   @given(single_set)
 7 |   def test_bitmap_find_first_zero(self, n_xs):
 8 |     n, xs = n_xs
 9 |     x = Bitmap.from_indices(n, xs)
10 | 
11 |     expected = -1 if x.full() else min(set(range(0, n)) - xs)
12 |     first = x.find_first_zero()
13 |     assert(first == expected)
14 | 
15 | 
16 |   @given(single_set)
17 |   def test_bitmap_find_first_bit(self, n_xs):
18 |     n, xs = n_xs
19 |     x = Bitmap.from_indices(n, xs)
20 | 
21 |     expected = -1 if x.empty() else min(xs)
22 |     first = x.find_first_bit()
23 |     assert(first == expected)
24 | 
25 | 
26 |   @given(single_set)
27 |   def test_bitmap_negation(self, n_xs):
28 |     n, xs = n_xs
29 |     x = Bitmap.from_indices(n, xs)
30 | 
31 |     y = -x
32 | 
33 |     for idx in xs:
34 |       assert(idx not in y)
35 | 
36 |     assert(x != y)
37 |     assert(x == -y)
38 | 
39 | 
40 |   @given(double_set)
41 |   def test_bitmap_union(self, n_xs_ys):
42 |     n, xs, ys = n_xs_ys
43 |     x, y = Bitmap.from_indices(n, xs), Bitmap.from_indices(n, ys)
44 | 
45 |     # tests __or__
46 |     z = x | y
47 |     assert(z == Bitmap.from_indices(n, xs | ys))
48 | 
49 |     # tests __ior__
50 |     x |= y
51 |     assert(x == z)
52 | 
53 | 
54 |   @given(double_set)
55 |   def test_bitmap_intersection(self, n_xs_ys):
56 |     n, xs, ys = n_xs_ys
57 |     x, y = Bitmap.from_indices(n, xs), Bitmap.from_indices(n, ys)
58 | 
59 |     # tests __and__
60 |     z = x & y
61 |     assert(z == Bitmap.from_indices(n, xs & ys))
62 | 
63 |     # tests __iand__
64 |     x &= y
65 |     assert(x == z)
66 | 
67 | 
68 |   @given(double_set)
69 |   def test_bitmap_xor(self, n_xs_ys):
70 |     n, xs, ys = n_xs_ys
71 |     x, y = Bitmap.from_indices(n, xs), Bitmap.from_indices(n, ys)
72 | 
73 |     # tests __xor__
74 |     z = x ^ y
75 |     assert(z == Bitmap.from_indices(n, xs ^ ys))
76 | 
77 |     # tests __ixor__
78 |     x ^= y
79 |     assert(x == z)
80 | 


--------------------------------------------------------------------------------
/tests/benchmarks/bench-bloomfilter.c:
--------------------------------------------------------------------------------
 1 | #include <stdlib.h>
 2 | 
 3 | #include <twiddle/bitmap/bitmap.h>
 4 | #include <twiddle/bloomfilter/bloomfilter.h>
 5 | 
 6 | #include "benchmark.h"
 7 | 
 8 | void bloomfilter_setup(struct benchmark *b)
 9 | {
10 |   const size_t size = b->size * 8;
11 |   const uint16_t k = 10;
12 | 
13 |   b->opaque = tw_bloomfilter_new(size, k);
14 |   assert(b->opaque);
15 | 
16 |   for (size_t i = 0; i < size; ++i) {
17 |     if (i % 3) {
18 |       tw_bloomfilter_set(b->opaque, &i, sizeof(i));
19 |     }
20 |   }
21 | }
22 | 
23 | void bloomfilter_teardown(struct benchmark *b)
24 | {
25 |   struct tw_bloomfilter *bf = (struct tw_bloomfilter *)b->opaque;
26 |   tw_bloomfilter_free(bf);
27 |   b->opaque = NULL;
28 | }
29 | 
30 | void bloomfilter_set(void *opaque)
31 | {
32 |   struct tw_bloomfilter *bf = (struct tw_bloomfilter *)opaque;
33 | 
34 |   const size_t n_rounds = (bf->bitmap->size) / (8 * 128);
35 |   for (size_t i = 0; i < n_rounds; ++i) {
36 |     tw_bloomfilter_set(bf, &i, sizeof(i));
37 |   }
38 | }
39 | 
40 | void bloomfilter_test(void *opaque)
41 | {
42 |   struct tw_bloomfilter *bf = (struct tw_bloomfilter *)opaque;
43 | 
44 |   const size_t n_rounds = (bf->bitmap->size) / (8 * 128);
45 |   for (size_t i = 0; i < n_rounds; ++i) {
46 |     tw_bloomfilter_test(bf, &i, sizeof(i));
47 |   }
48 | }
49 | 
50 | int main(int argc, char *argv[])
51 | {
52 | 
53 |   if (argc != 3) {
54 |     fprintf(stderr, "usage: %s <repeat> <size>\n", argv[0]);
55 |     return EXIT_FAILURE;
56 |   }
57 | 
58 |   const size_t repeat = strtol(argv[1], NULL, 10);
59 |   const size_t size = strtol(argv[2], NULL, 10);
60 |   (void)size;
61 | 
62 |   struct benchmark benchmarks[] = {
63 |       BENCHMARK_FIXTURE(bloomfilter_set, repeat, size, bloomfilter_setup,
64 |                         bloomfilter_teardown),
65 |       BENCHMARK_FIXTURE(bloomfilter_test, repeat, size, bloomfilter_setup,
66 |                         bloomfilter_teardown),
67 |   };
68 | 
69 |   run_benchmarks(benchmarks, sizeof(benchmarks) / sizeof(benchmarks[0]));
70 | 
71 |   return EXIT_SUCCESS;
72 | }
73 | 


--------------------------------------------------------------------------------
/tests/benchmarks/bench-bitmap.c:
--------------------------------------------------------------------------------
 1 | #include <stdlib.h>
 2 | 
 3 | #include <twiddle/bitmap/bitmap.h>
 4 | 
 5 | #include "benchmark.h"
 6 | 
 7 | struct dual_bitmap {
 8 |   struct tw_bitmap *a;
 9 |   struct tw_bitmap *b;
10 | };
11 | 
12 | void bitmap_dual_setup(struct benchmark *b)
13 | {
14 |   const size_t size = b->size * 8;
15 | 
16 |   b->opaque = malloc(sizeof(struct dual_bitmap));
17 |   struct dual_bitmap *dual = (struct dual_bitmap *)b->opaque;
18 |   assert(dual);
19 | 
20 |   dual->a = tw_bitmap_new(size);
21 |   assert(dual->a);
22 |   dual->b = tw_bitmap_new(size);
23 |   assert(dual->b);
24 | 
25 |   for (size_t i = 0; i < size; ++i) {
26 |     if (i % 5) {
27 |       tw_bitmap_set(dual->a, i);
28 |       tw_bitmap_set(dual->b, i);
29 |     }
30 |   }
31 | }
32 | 
33 | void bitmap_dual_teardown(struct benchmark *b)
34 | {
35 |   struct dual_bitmap *dual = (struct dual_bitmap *)b->opaque;
36 |   tw_bitmap_free(dual->b);
37 |   tw_bitmap_free(dual->a);
38 |   free(dual);
39 |   b->opaque = NULL;
40 | }
41 | 
42 | void bitmap_xor(void *opaque)
43 | {
44 |   struct dual_bitmap *dual = (struct dual_bitmap *)opaque;
45 | 
46 |   tw_bitmap_xor(dual->a, dual->b);
47 | }
48 | 
49 | void bitmap_equal(void *opaque)
50 | {
51 |   struct dual_bitmap *dual = (struct dual_bitmap *)opaque;
52 | 
53 |   bool res = tw_bitmap_equal(dual->a, dual->b);
54 |   (void)res;
55 | }
56 | 
57 | int main(int argc, char *argv[])
58 | {
59 | 
60 |   if (argc != 3) {
61 |     fprintf(stderr, "usage: %s <repeat> <size>\n", argv[0]);
62 |     return EXIT_FAILURE;
63 |   }
64 | 
65 |   const size_t repeat = strtol(argv[1], NULL, 10);
66 |   const size_t size = strtol(argv[2], NULL, 10);
67 | 
68 |   struct benchmark benchmarks[] = {
69 |       BENCHMARK_FIXTURE(bitmap_equal, repeat, size, bitmap_dual_setup,
70 |                         bitmap_dual_teardown),
71 |       BENCHMARK_FIXTURE(bitmap_xor, repeat, size, bitmap_dual_setup,
72 |                         bitmap_dual_teardown),
73 |   };
74 | 
75 |   run_benchmarks(benchmarks, sizeof(benchmarks) / sizeof(benchmarks[0]));
76 | 
77 |   return EXIT_SUCCESS;
78 | }
79 | 


--------------------------------------------------------------------------------
/tests/examples/hll-wc.c:
--------------------------------------------------------------------------------
 1 | #include <getopt.h>
 2 | #include <inttypes.h>
 3 | #include <stdio.h>
 4 | #include <stdlib.h>
 5 | #include <string.h>
 6 | 
 7 | #include <twiddle/hyperloglog/hyperloglog.h>
 8 | 
 9 | static struct option long_options[] = {{"precision", required_argument, 0, 'p'},
10 |                                        {"stream", no_argument, 0, 's'},
11 |                                        {0, 0, 0, 0}};
12 | 
13 | int parse_precision(uint8_t *p)
14 | {
15 |   const int64_t parsed_p = strtol(optarg, NULL, 10);
16 |   if (!(TW_HLL_MIN_PRECISION < parsed_p && parsed_p <= TW_HLL_MAX_PRECISION)) {
17 |     return -1;
18 |   }
19 | 
20 |   *p = parsed_p;
21 | 
22 |   return 0;
23 | }
24 | 
25 | static int parse_arguments(int argc, char **argv, uint8_t *p, bool *stream)
26 | {
27 |   int c = 0;
28 |   int ret = 0;
29 | 
30 |   while (1) {
31 |     int option_index = 0;
32 | 
33 |     c = getopt_long(argc, argv, "p:s", long_options, &option_index);
34 |     if (c == -1)
35 |       break;
36 | 
37 |     switch (c) {
38 |     case 'p':
39 |       if ((ret = parse_precision(p)) != 0) {
40 |         return ret;
41 |       }
42 |       break;
43 |     case 's':
44 |       *stream = true;
45 |       break;
46 |     default:
47 |       printf("?? getopt returned character code 0%o ??\n", c);
48 |       return -1;
49 |     }
50 |   }
51 | 
52 |   return 0;
53 | }
54 | 
55 | int main(int argc, char *argv[])
56 | {
57 |   uint8_t p = 16;
58 |   bool stream = false;
59 | 
60 |   if (parse_arguments(argc, argv, &p, &stream) != 0) {
61 |     exit(-1);
62 |   }
63 | 
64 |   struct tw_hyperloglog *hll = tw_hyperloglog_new(p);
65 | 
66 |   if (!hll) {
67 |     exit(1);
68 |   }
69 | 
70 |   char *line = NULL;
71 |   size_t buf_len = 0;
72 |   ssize_t line_len = 0;
73 | 
74 |   while ((line_len = getline(&line, &buf_len, stdin)) != -1) {
75 |     tw_hyperloglog_add(hll, line, line_len);
76 |     if (stream) {
77 |       fprintf(stdout, "%" PRIu64 "\n", (uint64_t)tw_hyperloglog_count(hll));
78 |     }
79 |   }
80 | 
81 |   if (!stream) {
82 |     fprintf(stdout, "%" PRIu64 "\n", (uint64_t)tw_hyperloglog_count(hll));
83 |   }
84 | 
85 |   free(line);
86 |   tw_hyperloglog_free(hll);
87 | 
88 |   return 0;
89 | }
90 | 


--------------------------------------------------------------------------------
/tests/check/Makefile.am:
--------------------------------------------------------------------------------
 1 | ## Process this file with automake to produce Makefile.in
 2 | 
 3 | lib_LTLIBRARIES		= libcheck.la
 4 | noinst_LTLIBRARIES	= libcheckinternal.la
 5 | 
 6 | include_HEADERS		= check.h
 7 | 
 8 | EXTRA_DIST		= check.h.in
 9 | 
10 | AM_CFLAGS		= @GCOV_CFLAGS@ @PTHREAD_CFLAGS@ $(LIBSUBUNIT_CFLAGS)
11 | 
12 | CFILES =\
13 | 	check.c		\
14 | 	check_error.c	\
15 | 	check_list.c	\
16 | 	check_log.c	\
17 | 	check_msg.c	\
18 | 	check_pack.c	\
19 | 	check_print.c	\
20 | 	check_run.c	\
21 | 	check_str.c
22 | 
23 | HFILES =\
24 | 	check.h		\
25 | 	check_error.h	\
26 | 	check_impl.h	\
27 | 	check_list.h	\
28 | 	check_log.h	\
29 | 	check_msg.h	\
30 | 	check_pack.h	\
31 | 	check_print.h	\
32 | 	check_str.h
33 | 
34 | 
35 | EXPORT_SYM	= exported.sym
36 | $(EXPORT_SYM): check.h.in
37 | 	${SED} -n -e 's/^..*CK_EXPORT[[:space:]][[:space:]]*\([[:alnum:]_][[:alnum:]_]*\)..*$$/\1/p' @top_srcdir@/src/check.h.in > $@
38 | 
39 | libcheck_la_DEPENDENCIES= $(EXPORT_SYM)
40 | libcheck_la_LDFLAGS	= -no-undefined -export-symbols $(EXPORT_SYM)
41 | libcheck_la_SOURCES	= $(CFILES) $(HFILES)
42 | libcheck_la_LIBADD	= @GCOV_LIBS@ @PTHREAD_LIBS@ $(LIBSUBUNIT_LIBS) $(top_builddir)/lib/libcompat.la
43 | 
44 | libcheckinternal_la_LDFLAGS     = -no-undefined
45 | libcheckinternal_la_SOURCES	= $(CFILES) $(HFILES)
46 | libcheckinternal_la_LIBADD	= @GCOV_LIBS@ @PTHREAD_LIBS@ $(LIBSUBUNIT_LIBS) $(top_builddir)/lib/libcompat.la
47 | 
48 | CLEANFILES	= *~ *.gcno $(EXPORT_SYM)
49 | 
50 | LCOV_INPUT	= $(CFILES:%.c=.libs/%.gcda)
51 | LCOV_OUTPUT	= lcov.info
52 | LCOV_HTML	= lcov_html
53 | LCOV_LCOV	= @LCOV@
54 | LCOV_GENHTML	= @GENHTML@
55 | 
56 | lcov: $(LCOV_HTML)
57 | 
58 | $(LCOV_INPUT): libcheck.la libcheckinternal.la
59 | 	@$(MAKE) -C $(top_builddir)/tests check
60 | 
61 | $(LCOV_OUTPUT): $(LCOV_INPUT)
62 | 	$(LCOV_LCOV) --capture --directory . --base-directory . --output-file $@
63 | 
64 | $(LCOV_HTML): $(LCOV_OUTPUT)
65 | 	-$(RM) -r $@
66 | 	LANG=C $(LCOV_GENHTML) --output-directory $@ --title "Check Code Coverage" --show-details $<
67 | 	@echo "Point a web browser at $(LCOV_HTML)/index.html to see results."
68 | 
69 | clean-local: lcov-clean
70 | 
71 | .PHONY: lcov-clean
72 | lcov-clean:
73 | 	-$(RM) -r $(LCOV_HTML) $(LCOV_OUTPUT)
74 | 


--------------------------------------------------------------------------------
/tests/check/check_pack.h:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Check: a unit test framework for C
 3 |  * Copyright (C) 2001, 2002 Arien Malec
 4 |  *
 5 |  * This library is free software; you can redistribute it and/or
 6 |  * modify it under the terms of the GNU Lesser General Public
 7 |  * License as published by the Free Software Foundation; either
 8 |  * version 2.1 of the License, or (at your option) any later version.
 9 |  *
10 |  * This library is distributed in the hope that it will be useful,
11 |  * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 |  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
13 |  * Lesser General Public License for more details.
14 |  *
15 |  * You should have received a copy of the GNU Lesser General Public
16 |  * License along with this library; if not, write to the
17 |  * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
18 |  * Boston, MA 02111-1307, USA.
19 |  */
20 | 
21 | #ifndef CHECK_PACK_H
22 | #define CHECK_PACK_H
23 | 
24 | 
25 | enum ck_msg_type
26 | {
27 |     CK_MSG_CTX,
28 |     CK_MSG_FAIL,
29 |     CK_MSG_LOC,
30 |     CK_MSG_DURATION,
31 |     CK_MSG_LAST
32 | };
33 | 
34 | typedef struct CtxMsg
35 | {
36 |     enum ck_result_ctx ctx;
37 | } CtxMsg;
38 | 
39 | typedef struct LocMsg
40 | {
41 |     int line;
42 |     char *file;
43 | } LocMsg;
44 | 
45 | typedef struct FailMsg
46 | {
47 |     char *msg;
48 | } FailMsg;
49 | 
50 | typedef struct DurationMsg
51 | {
52 |     int duration;
53 | } DurationMsg;
54 | 
55 | typedef union
56 | {
57 |     CtxMsg ctx_msg;
58 |     FailMsg fail_msg;
59 |     LocMsg loc_msg;
60 |     DurationMsg duration_msg;
61 | } CheckMsg;
62 | 
63 | typedef struct RcvMsg
64 | {
65 |     enum ck_result_ctx lastctx;
66 |     enum ck_result_ctx failctx;
67 |     char *fixture_file;
68 |     int fixture_line;
69 |     char *test_file;
70 |     int test_line;
71 |     char *msg;
72 |     int duration;
73 | } RcvMsg;
74 | 
75 | void rcvmsg_free(RcvMsg * rmsg);
76 | 
77 | 
78 | int pack(enum ck_msg_type type, char **buf, CheckMsg * msg);
79 | int upack(char *buf, CheckMsg * msg, enum ck_msg_type *type);
80 | 
81 | void ppack(FILE * fdes, enum ck_msg_type type, CheckMsg * msg);
82 | RcvMsg *punpack(FILE * fdes);
83 | 
84 | #endif /*CHECK_PACK_H */
85 | 


--------------------------------------------------------------------------------
/src/twiddle/macrology.h:
--------------------------------------------------------------------------------
 1 | #ifndef TWIDDLE_INTERNAL_UTILS_H
 2 | #define TWIDDLE_INTERNAL_UTILS_H
 3 | 
 4 | #include <assert.h>
 5 | #include <float.h>
 6 | #include <math.h>
 7 | #include <stdlib.h>
 8 | #include <x86intrin.h>
 9 | 
10 | #ifndef static_assert
11 | #define static_assert _Static_assert
12 | #endif
13 | 
14 | /* Number of bytes per cache line */
15 | #ifndef TW_CACHELINE
16 | #define TW_CACHELINE 64
17 | #endif
18 | 
19 | #define TW_DIV_ROUND_UP(n, d) (((n) + (d)-1) / (d))
20 | 
21 | #define TW_ALLOC_TO_CACHELINE(size)                                            \
22 |   ((TW_DIV_ROUND_UP((size), TW_CACHELINE) * TW_CACHELINE))
23 | 
24 | #define TW_ARRAY_SIZE(array) (sizeof(array) / sizeof(array[0]))
25 | 
26 | #define TW_BITOP_ADDR(x) "+m"(*(volatile long *)(x))
27 | 
28 | #define TW_BITS_IN_WORD 8
29 | #define TW_BIT_POS(x) (1 << ((x) % TW_BITS_IN_WORD))
30 | #define TW_BYTE_POS(x) ((x) / TW_BITS_IN_WORD)
31 | 
32 | #define tw_likely(x) __builtin_expect((x), 1)
33 | #define tw_unlikely(x) __builtin_expect((x), 0)
34 | 
35 | /* use with care, it evaluates twice a & b */
36 | #define tw_min(a, b) (((a) < (b)) ? (a) : (b))
37 | #define tw_max(a, b) (((a) < (b)) ? (b) : (a))
38 | 
39 | #define tw_almost_equal(a, b) (fabs((a) - (b)) < FLT_EPSILON)
40 | 
41 | #ifdef _ISOC11_SOURCE
42 | #define malloc_aligned aligned_alloc
43 | #else
44 | #if _POSIX_C_SOURCE >= 200112L || _XOPEN_SOURCE >= 600
45 | static inline void *__aligned_alloc(size_t align, size_t size)
46 | {
47 |   void *ptr = NULL;
48 |   if (posix_memalign(&ptr, align, size)) {
49 |     return NULL;
50 |   }
51 | 
52 |   return ptr;
53 | }
54 | #define malloc_aligned __aligned_alloc
55 | #else
56 | static_assert(false, "you're in hell.");
57 | #endif
58 | #endif
59 | 
60 | #define tw_simd_equal(a, b, simd_cmpeq, simd_maskmove, mask)                   \
61 |   ((int)mask == simd_maskmove(simd_cmpeq((a), (b))))
62 | 
63 | #define tw_mm256_equal(a, b)                                                   \
64 |   tw_simd_equal((a), (b), _mm256_cmpeq_epi8, _mm256_movemask_epi8, 0xFFFFFFFF)
65 | 
66 | #define tw_mm_equal(a, b)                                                      \
67 |   tw_simd_equal((a), (b), _mm_cmpeq_epi8, _mm_movemask_epi8, 0xFFFF)
68 | 
69 | #endif /* TWIDDLE_INTERNAL_UTILS_H */
70 | 


--------------------------------------------------------------------------------
/tests/check/check_error.c:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Check: a unit test framework for C
 3 |  * Copyright (C) 2001, 2002 Arien Malec
 4 |  *
 5 |  * This library is free software; you can redistribute it and/or
 6 |  * modify it under the terms of the GNU Lesser General Public
 7 |  * License as published by the Free Software Foundation; either
 8 |  * version 2.1 of the License, or (at your option) any later version.
 9 |  *
10 |  * This library is distributed in the hope that it will be useful,
11 |  * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 |  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
13 |  * Lesser General Public License for more details.
14 |  *
15 |  * You should have received a copy of the GNU Lesser General Public
16 |  * License along with this library; if not, write to the
17 |  * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
18 |  * Boston, MA 02111-1307, USA.
19 |  */
20 | 
21 | #include "libcompat.h"
22 | 
23 | #include <errno.h>
24 | #include <setjmp.h>
25 | #include <stdarg.h>
26 | #include <stdio.h>
27 | #include <stdlib.h>
28 | #include <string.h>
29 | 
30 | #include "check_error.h"
31 | 
32 | /**
33 |  * Storage for setjmp/longjmp context information used in NOFORK mode
34 |  */
35 | jmp_buf error_jmp_buffer;
36 | 
37 | /* FIXME: including a colon at the end is a bad way to indicate an error */
38 | __attribute__((format(printf, 1, 4))) void
39 | eprintf(const char *fmt, const char *file, int line, ...)
40 | {
41 |   va_list args;
42 | 
43 |   fflush(stderr);
44 | 
45 |   fprintf(stderr, "%s:%d: ", file, line);
46 |   va_start(args, line);
47 |   vfprintf(stderr, fmt, args);
48 |   va_end(args);
49 | 
50 |   /*include system error information if format ends in colon */
51 |   if (fmt[0] != '\0' && fmt[strlen(fmt) - 1] == ':')
52 |     fprintf(stderr, " %s", strerror(errno));
53 |   fprintf(stderr, "\n");
54 | 
55 |   exit(2);
56 | }
57 | 
58 | void *emalloc(size_t n)
59 | {
60 |   void *p;
61 | 
62 |   p = malloc(n);
63 |   if (p == NULL)
64 |     eprintf("malloc of %zu bytes failed:", __FILE__, __LINE__ - 2, n);
65 |   return p;
66 | }
67 | 
68 | void *erealloc(void *ptr, size_t n)
69 | {
70 |   void *p;
71 | 
72 |   p = realloc(ptr, n);
73 |   if (p == NULL)
74 |     eprintf("realloc of %zu bytes failed:", __FILE__, __LINE__ - 2, n);
75 |   return p;
76 | }
77 | 


--------------------------------------------------------------------------------
/tests/check/check_log.h:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Check: a unit test framework for C
 3 |  * Copyright (C) 2001,2002 Arien Malec
 4 |  *
 5 |  * This library is free software; you can redistribute it and/or
 6 |  * modify it under the terms of the GNU Lesser General Public
 7 |  * License as published by the Free Software Foundation; either
 8 |  * version 2.1 of the License, or (at your option) any later version.
 9 |  *
10 |  * This library is distributed in the hope that it will be useful,
11 |  * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 |  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
13 |  * Lesser General Public License for more details.
14 |  *
15 |  * You should have received a copy of the GNU Lesser General Public
16 |  * License along with this library; if not, write to the
17 |  * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
18 |  * Boston, MA 02111-1307, USA.
19 |  */
20 | 
21 | #ifndef CHECK_LOG_H
22 | #define CHECK_LOG_H
23 | 
24 | void log_srunner_start(SRunner * sr);
25 | void log_srunner_end(SRunner * sr);
26 | void log_suite_start(SRunner * sr, Suite * s);
27 | void log_suite_end(SRunner * sr, Suite * s);
28 | void log_test_end(SRunner * sr, TestResult * tr);
29 | void log_test_start(SRunner * sr, TCase * tc, TF * tfun);
30 | 
31 | void stdout_lfun(SRunner * sr, FILE * file, enum print_output,
32 |                  void *obj, enum cl_event evt);
33 | 
34 | void lfile_lfun(SRunner * sr, FILE * file, enum print_output,
35 |                 void *obj, enum cl_event evt);
36 | 
37 | void xml_lfun(SRunner * sr, FILE * file, enum print_output,
38 |               void *obj, enum cl_event evt);
39 | 
40 | void tap_lfun(SRunner * sr, FILE * file, enum print_output,
41 |               void *obj, enum cl_event evt);
42 | 
43 | void subunit_lfun(SRunner * sr, FILE * file, enum print_output,
44 |                   void *obj, enum cl_event evt);
45 | 
46 | void srunner_register_lfun(SRunner * sr, FILE * lfile, int close,
47 |                            LFun lfun, enum print_output);
48 | 
49 | FILE *srunner_open_lfile(SRunner * sr);
50 | FILE *srunner_open_xmlfile(SRunner * sr);
51 | FILE *srunner_open_tapfile(SRunner * sr);
52 | void srunner_init_logging(SRunner * sr, enum print_output print_mode);
53 | void srunner_end_logging(SRunner * sr);
54 | 
55 | #endif /* CHECK_LOG_H */
56 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | libtwiddle
 2 | ==========
 3 | [![Build Status](https://travis-ci.org/fsaintjacques/libtwiddle.svg?branch=develop)](https://travis-ci.org/fsaintjacques/libtwiddle)
 4 | [![License](https://img.shields.io/badge/license-LGPL--3.0-blue.svg?style=flat)](https://github.com/fsaintjacques/libtwiddle/blob/develop/LICENSE)
 5 | 
 6 | 
 7 | libtwiddle is a data structure library aiming for speed on modern
 8 | Linux x86-64 systems. The following data structures are implemented:
 9 | 
10 |   * bitmaps (dense & RLE);
11 |   * Bloom filters (standard & active-active);
12 |   * HyperLogLog
13 |   * MinHash
14 | 
15 | See [`EXAMPLES.md`](EXAMPLES.md) and
16 | [`tests/examples/`](tests/examples/) for examples.
17 | 
18 | Why should you use libtwiddle?
19 | ------------------------------
20 | 
21 |   * Written for the modern age; `gcc >= 4.8`, using C11 features, on x86-64
22 |   * Extensive testing; unit tests and random property testing with `hypothesis`
23 |   * Python bindings; courtesy of property testing
24 |   * Vectorized implementations; AVX, AVX2, and partial support for AVX512
25 |   * Continuous integration
26 | 
27 | Install
28 | =======
29 | 
30 | libtwiddle uses CMake as its build manager.
31 | 
32 | Prerequisite libraries
33 | ----------------------
34 | 
35 | To build libtwiddle, you need the following:
36 | 
37 |   * a recent C compiler (`gcc >= 4.8` or `clang >= 3.5`).
38 |   * CMake build tool (`cmake >= 3.0`)
39 | 
40 | Building from source
41 | --------------------
42 | 
43 | In most cases, you should be able to build the source code using the following:
44 | 
45 |     $ mkdir build
46 |     $ cd build
47 |     $ cmake .. -DCMAKE_INSTALL_PREFIX=$PREFIX
48 |     $ make
49 |     $ make test
50 |     $ make install
51 | 
52 | Building with SIMD support
53 | --------------------------
54 | 
55 | By default, libtwiddle will compile with AVX SIMD instructions. Use
56 | the following flags to enable newer instructions:
57 | 
58 |   * For AVX2:   `-DUSE_AVX2=ON`;
59 |   * for AVX512: `-DUSE_AVX512=ON`.
60 | 
61 | Note that AVX2 implies AVX, and AVX512 implies AVX2. Some functions
62 | can't be implemented with AVX512, and will fallback to AVX2 code.
63 | 
64 | To compile without SIMD support, invoke CMake with `-DUSE_AVX=OFF
65 | -DUSE_AVX2=OFF -DUSE_AVX512=OFF`.
66 | 
67 | Contributions
68 | -------------
69 | 
70 | Contributions are more than welcome, see
71 | [`CONTRIBUTING.md`](CONTRIBUTING.md) for details.
72 | 


--------------------------------------------------------------------------------
/.travis.yml:
--------------------------------------------------------------------------------
 1 | sudo: required
 2 | dist: trusty
 3 | 
 4 | matrix:
 5 |   include:
 6 |     - env: GCC_VERSION=4.8
 7 |       addons: &gcc48
 8 |         apt:
 9 |           packages:
10 |             - gcc-4.8
11 |             - g++-4.8
12 |             - valgrind
13 |             - check
14 |             - pkg-config
15 |             - cmake
16 |             - cmake-data
17 |             - python-virtualenv
18 |           sources:
19 |             - ubuntu-toolchain-r-test
20 |             - george-edison55-precise-backports
21 | 
22 |     - env: GCC_VERSION=4.9
23 |       addons: &gcc49
24 |         apt:
25 |           packages:
26 |             - gcc-4.9
27 |             - g++-4.9
28 |             - valgrind
29 |             - check
30 |             - pkg-config
31 |             - cmake
32 |             - cmake-data
33 |             - python-virtualenv
34 |           sources:
35 |             - ubuntu-toolchain-r-test
36 |             - george-edison55-precise-backports
37 | 
38 |     - env: GCC_VERSION=5
39 |       addons: &gcc50
40 |         apt:
41 |           packages:
42 |             - gcc-5
43 |             - g++-5
44 |             - valgrind
45 |             - check
46 |             - pkg-config
47 |             - cmake
48 |             - cmake-data
49 |             - python-virtualenv
50 |           sources:
51 |             - ubuntu-toolchain-r-test
52 |             - george-edison55-precise-backports
53 | 
54 |     - env: CLANG_VERSION=3.5
55 |       addons: &clang35
56 |         apt:
57 |           packages:
58 |             - clang-3.5
59 |             - llvm-3.5
60 |             - valgrind
61 |             - check
62 |             - pkg-config
63 |             - cmake
64 |             - cmake-data
65 |             - python-virtualenv
66 |           sources:
67 |             - ubuntu-toolchain-r-test
68 |             - george-edison55-precise-backports
69 |             - llvm-toolchain-precise-3.5
70 | 
71 |     - env: CLANG_VERSION=3.6
72 |       addons: &clang36
73 |         apt:
74 |           packages:
75 |             - clang-3.6
76 |             - llvm-3.6
77 |             - valgrind
78 |             - check
79 |             - pkg-config
80 |             - cmake
81 |             - cmake-data
82 |             - python-virtualenv
83 |           sources:
84 |             - ubuntu-toolchain-r-test
85 |             - george-edison55-precise-backports
86 |             - llvm-toolchain-precise-3.6
87 | 
88 | 
89 | script:
90 |   - cd ${TRAVIS_BUILD_DIR} && tools/travis/test
91 | 


--------------------------------------------------------------------------------
/CONTRIBUTING.md:
--------------------------------------------------------------------------------
 1 | Formatting
 2 | ==========
 3 | 
 4 | `clang-format` enforces a standard style for C code. You can find a git
 5 | pre-commit hook in `tools/git/hook-clang-format` that will ensure every source
 6 | code file respects the style.
 7 | 
 8 | Test infrastructure
 9 | ===================
10 | 
11 | libtwiddle is backed by standard unit checks implemented with the Check
12 | library. Minimal Python bindings exist to test more thoroughly with the
13 | property-testing library [Hypothesis](https://github.com/DRMacIver/hypothesis).
14 | 
15 | 
16 | When developing, we recommend using the `Debug` release type, as it
17 | will enable debugging symbols:
18 | 
19 | ```
20 | $ mkdir build && cd build && cmake -DCMAKE_BUILD_TYPE=Debug ..
21 | $ make clean
22 | $ make all
23 | $ make test
24 | ...
25 | ```
26 | 
27 | One can also invoke the test suite with the `tools/travis/test` script which
28 | should create a grid of build directories: `build-{portable,avx,avx2}` and run
29 | the tests on each build directory. It should automagicaly detect CPU support for
30 | instruction sets required.
31 | 
32 | To test with Hypothesis, one must prepare a virtualenv and install
33 | dependencies. This is only needed once.
34 | 
35 | ```
36 | $ # setup virtualenv for pip
37 | $ cd python && virtualenv .venv && source .venv/bin/activate
38 | $ # install dependencies
39 | $ pip install pytest hypothesis==1.19.0
40 | $ # run property tests
41 | $ PYTHONPATH=. py.test tests
42 | ```
43 | 
44 | One can also invoke the travis test script to run integration test:
45 | `RUN_HYPOTHESIS=true tools/travis/test`.
46 | 
47 | Travis
48 | ------
49 | 
50 | Travis will try to build and run the test infrastructure on each commit.
51 | To fasten up feedback, integration tests are only run on pull-request branches
52 | from github.
53 | 
54 | Debugging
55 | ---------
56 | 
57 | Check forks for each test, making it hard to debug. To work around this,
58 | before entering gdb, you can do `export CK_FORK=no`
59 | and Check will stop forking.
60 | 
61 | 
62 | Adding a new data structure
63 | ===========================
64 | 
65 | Assume you want to add a new data structure with shortname `ds` in group
66 | `ds_group`.
67 | 
68 | Creating a new data structure involves the following steps:
69 | 
70 |   * add header file `include/ds_group/ds.h`
71 |   * add implementation file `src/ds_group/ds.c`
72 |   * update `src/CMakeLists.txt` and add reference to previous file
73 | 
74 | Then, one must implement tests:
75 | 
76 |   * create unit test in `tests/test-ds.c`
77 |   * update `tests/CMakeLists.txt` and add reference to previous file
78 |   * create usage example in `tests/examples/example-ds.c`
79 |   * update `tests/examples/CMakeLists.txt` and add reference to previous file
80 | 
81 | Finally, Python bindings and tests must be added:
82 | 
83 |   * update python-c bindings in `python/twiddle/c.py`
84 |   * add Python class in `python/twiddle/ds.py`
85 |   * add fuzzing test in `python/tests/test_ds.py`
86 | 


--------------------------------------------------------------------------------
/docs/CMakeLists.txt:
--------------------------------------------------------------------------------
 1 | # Fill in this with the text that you want to include in the header and footer
 2 | # of each man page.
 3 | 
 4 | set(MAN_HEADER "libtwiddle documentation")
 5 | set(MAN_FOOTER "libtwiddle")
 6 | 
 7 | # Fill this in with any man pages that should be built from a pandoc source
 8 | # file.  For a man page called foo.5, there should be a pandoc source file
 9 | # called foo.5.md.
10 | 
11 | set(MAN_PAGES
12 |   tw_bitmap.2
13 | )
14 | 
15 | #-----------------------------------------------------------------------
16 | # Everything below is boilerplate!
17 | 
18 | find_program(
19 |     PANDOC_EXECUTABLE
20 |     NAMES pandoc
21 |     HINTS ENV PANDOC_DIR
22 |     PATH_SUFFIXES bin
23 |     DOC "Pandoc man page generator"
24 | )
25 | 
26 | set(GENERATE_DOC TRUE CACHE BOOL
27 |     "Whether to rebuild documentation")
28 | 
29 | if (NOT PANDOC_EXECUTABLE)
30 |     message(WARNING "Unable to find pandoc documentation generator")
31 |     set(GENERATE_DOC FALSE)
32 | endif (NOT PANDOC_EXECUTABLE)
33 | 
34 | 
35 | # Link man pages go in docs/links
36 | 
37 | macro(install_links section)
38 |     file(GLOB links "links/*.${section}")
39 |     if (links)
40 |         install(
41 |             FILES ${links}
42 |             DESTINATION "share/man/man${section}"
43 |         )
44 |     endif (links)
45 | endmacro(install_links section)
46 | 
47 | install_links(1)   # commands
48 | install_links(3)   # library API
49 | install_links(4)   # special files and drivers
50 | install_links(5)   # file formats and conventions
51 | install_links(7)   # miscellaneous
52 | install_links(8)   # system commands
53 | 
54 | 
55 | # Man pages with actual content go in docs
56 | 
57 | set(ALL_MANS)
58 | 
59 | macro(pandocify name)
60 |     set(src  "${CMAKE_CURRENT_SOURCE_DIR}/${name}.md")
61 |     set(dest "${CMAKE_CURRENT_SOURCE_DIR}/${name}")
62 |     get_filename_component(section "${name}" EXT)
63 |     string(REPLACE "." "" section "${section}")
64 | 
65 |     # Only compile the markdown source into groff man pages if requested.
66 |     if (GENERATE_DOC)
67 |         add_custom_command(
68 |             OUTPUT ${dest}
69 |             COMMAND ${PANDOC_EXECUTABLE}
70 |                 -f markdown -t man -s --smart
71 |                 -V header="${MAN_HEADER}"
72 |                 -V footer="${MAN_FOOTER}"
73 |                 -V date=${RELEASE_DATE}
74 |                 -o ${dest} ${src}
75 |             MAIN_DEPENDENCY ${src}
76 |             COMMENT "Building ${name}"
77 |         )
78 |         list(APPEND ALL_MANS ${dest})
79 |     endif (GENERATE_DOC)
80 | 
81 |     # We should always have an already-compiled copy of each man page in the
82 |     # source tree, which we can install even if we didn't build fresh new
83 |     # copies.
84 |     install(
85 |         FILES ${dest}
86 |         DESTINATION "share/man/man${section}"
87 |     )
88 | endmacro(pandocify)
89 | 
90 | foreach(MAN_PAGE ${MAN_PAGES})
91 |     pandocify(${MAN_PAGE})
92 | endforeach(MAN_PAGE)
93 | 
94 | add_custom_target(doc ALL DEPENDS ${ALL_MANS})
95 | 


--------------------------------------------------------------------------------
/tools/cmake/FindPrereqs.cmake:
--------------------------------------------------------------------------------
 1 | #-----------------------------------------------------------------------
 2 | # Configuration options that control all of the below
 3 | 
 4 | set(PKG_CONFIG_PATH CACHE STRING "pkg-config search path")
 5 | if (PKG_CONFIG_PATH)
 6 |     set(ENV{PKG_CONFIG_PATH} "${PKG_CONFIG_PATH}:$ENV{PKG_CONFIG_PATH}")
 7 | endif (PKG_CONFIG_PATH)
 8 | 
 9 | 
10 | #-----------------------------------------------------------------------
11 | # pkg-config prerequisites
12 | 
13 | find_package(PkgConfig)
14 | 
15 | function(pkgconfig_prereq DEP)
16 |     set(options OPTIONAL)
17 |     set(one_args)
18 |     set(multi_args)
19 |     cmake_parse_arguments(_ "${options}" "${one_args}" "${multi_args}" ${ARGN})
20 | 
21 |     string(REGEX REPLACE "[<>=].*" "" SHORT_NAME "${DEP}")
22 |     string(REPLACE "-" "_" SHORT_NAME "${SHORT_NAME}")
23 |     string(TOUPPER ${SHORT_NAME} UPPER_SHORT_NAME)
24 |     string(TOLOWER ${SHORT_NAME} LOWER_SHORT_NAME)
25 | 
26 |     set(USE_CUSTOM_${UPPER_SHORT_NAME} NO CACHE BOOL
27 |         "Whether you want to provide custom details for ${LOWER_SHORT_NAME}")
28 | 
29 |     if (NOT USE_CUSTOM_${UPPER_SHORT_NAME})
30 |         set(PKG_CHECK_ARGS)
31 |         if (NOT __OPTIONAL)
32 |             list(APPEND PKG_CHECK_ARGS REQUIRED)
33 |         endif (NOT __OPTIONAL)
34 |         list(APPEND PKG_CHECK_ARGS ${DEP})
35 | 
36 |         pkg_check_modules(${UPPER_SHORT_NAME} ${PKG_CHECK_ARGS})
37 |     endif (NOT USE_CUSTOM_${UPPER_SHORT_NAME})
38 | 
39 |     include_directories(${${UPPER_SHORT_NAME}_INCLUDE_DIRS})
40 |     link_directories(${${UPPER_SHORT_NAME}_LIBRARY_DIRS})
41 | endfunction(pkgconfig_prereq)
42 | 
43 | 
44 | #-----------------------------------------------------------------------
45 | # find_library prerequisites
46 | 
47 | function(library_prereq LIB_NAME)
48 |     set(options OPTIONAL)
49 |     set(one_args)
50 |     set(multi_args)
51 |     cmake_parse_arguments(_ "${options}" "${one_args}" "${multi_args}" ${ARGN})
52 | 
53 |     string(REPLACE "-" "_" SHORT_NAME "${LIB_NAME}")
54 |     string(TOUPPER ${SHORT_NAME} UPPER_SHORT_NAME)
55 |     string(TOLOWER ${SHORT_NAME} LOWER_SHORT_NAME)
56 | 
57 |     set(USE_CUSTOM_${UPPER_SHORT_NAME} NO CACHE BOOL
58 |         "Whether you want to provide custom details for ${LOWER_SHORT_NAME}")
59 | 
60 |     if (USE_CUSTOM_${UPPER_SHORT_NAME})
61 |         include_directories(${${UPPER_SHORT_NAME}_INCLUDE_DIRS})
62 |         link_directories(${${UPPER_SHORT_NAME}_LIBRARY_DIRS})
63 |         if (NOT ${UPPER_SHORT_NAME}_STATIC_LDFLAGS)
64 |             set(${UPPER_SHORT_NAME}_STATIC_LDFLAGS
65 |                 ${${UPPER_SHORT_NAME}_LDFLAGS}
66 |                 PARENT_SCOPE)
67 |         endif (NOT ${UPPER_SHORT_NAME}_STATIC_LDFLAGS)
68 |     else (USE_CUSTOM_${UPPER_SHORT_NAME})
69 |         find_library(${UPPER_SHORT_NAME}_LDFLAGS ${LIB_NAME})
70 |         set(${UPPER_SHORT_NAME}_STATIC_LDFLAGS
71 |             ${${UPPER_SHORT_NAME}_LDFLAGS}
72 |             PARENT_SCOPE)
73 |     endif (USE_CUSTOM_${UPPER_SHORT_NAME})
74 | 
75 | endfunction(library_prereq)
76 | 


--------------------------------------------------------------------------------
/tools/travis/test:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env bash
  2 | 
  3 | set -o errexit
  4 | set -o nounset
  5 | set -o pipefail
  6 | 
  7 | set -o xtrace
  8 | 
  9 | : ${BUILD_TYPE:=Release}
 10 | : ${RUN_VALGRIND:=false}
 11 | : ${RUN_HYPOTHESIS:=false}
 12 | : ${VERBOSE:=false}
 13 | 
 14 | function set_travis_env {
 15 | 
 16 |   VERBOSE=true
 17 | 
 18 |   if [[ -n "${CLANG_VERSION:-}" ]]; then
 19 |     export CC="clang-${CLANG_VERSION}" ;
 20 |     export CXX="clang++-${CLANG_VERSION}" ;
 21 |     export GCOV="llvm-cov-${CLANG_VERSION}" ;
 22 |   elif [[ -n "${GCC_VERSION:-}" ]]; then
 23 |     export CC="gcc-${GCC_VERSION}" ;
 24 |     export CXX="g++-${GCC_VERSION}" ;
 25 |     export GCOV="gcov-${GCC_VERSION}" ;
 26 |   fi
 27 | 
 28 |   if [[ ${TRAVIS_PULL_REQUEST:-false} != "false" ]]; then
 29 |     RUN_HYPOTHESIS=true
 30 |     RUN_VALGRIND=true
 31 |   fi
 32 | 
 33 | }
 34 | 
 35 | function run_tests {
 36 |   local name=$1
 37 | 
 38 |   local dir="build-${name}"
 39 | 
 40 |   # if you change this, modify `python/twiddle/c.py` accordingly.
 41 |   export BUILD_DIR=$dir
 42 | 
 43 |   mkdir -p "$dir"
 44 | 
 45 |   CMAKE_FLAGS="-DUSE_AVX=OFF"
 46 |   if [[ ${name:-} = "avx512" ]]; then
 47 |     CMAKE_FLAGS="-DUSE_AVX512=ON"
 48 |   elif [[ ${name:-} = "avx2" ]]; then
 49 |     CMAKE_FLAGS="-DUSE_AVX2=ON"
 50 |   elif [[ ${name:-} = "avx" ]]; then
 51 |     CMAKE_FLAGS="-DUSE_AVX=ON"
 52 |     if [[ ${RUN_VALGRIND:-false} != "false" ]]; then
 53 |       CMAKE_FLAGS="-DUSE_VALGRIND=ON ${CMAKE_FLAGS}$"
 54 |     fi
 55 |   fi
 56 | 
 57 | 
 58 |   pushd $dir
 59 | 
 60 |   if [[ -n "${CLANG_VERSION:-}" ]]; then
 61 |     #LLVM_AR=$(which llvm-ar-${CLANG_VERSION})
 62 |     #LLVM_RANLIB=$(which llvm-ranlib-${CLANG_VERSION})
 63 |     #CMAKE_FLAGS="-DCMAKE_AR=${LLVM_AR} -DCMAKE_RANLIB=${LLVM_RANLIB} ${CMAKE_FLAGS}"
 64 | 
 65 |     # I'm not wasting time on travis anymore. If you're interested in building
 66 |     # libtwiddle with clang & LTO, see the previous commented invocation.
 67 |     CMAKE_FLAGS="-DUSE_LTO=OFF ${CMAKE_FLAGS}"
 68 |   fi
 69 | 
 70 |   cmake -DCMAKE_BUILD_TYPE=${BUILD_TYPE} \
 71 |         ${CMAKE_FLAGS:-} ..
 72 | 
 73 |   # disable FORK (and incidently timeouts)
 74 |   export CK_FORK=no
 75 | 
 76 |   if [[ ${VERBOSE:-false} != "false" ]]; then
 77 |     VERBOSE_FLAG="VERBOSE=1"
 78 |   fi
 79 | 
 80 |   # unit tests
 81 |   make all test ${VERBOSE_FLAG:-}
 82 |   popd
 83 | 
 84 |   if [[ ${RUN_HYPOTHESIS} != "false" ]]; then
 85 |     # property python tests
 86 |     run_property_tests
 87 |   fi
 88 | 
 89 | }
 90 | 
 91 | function run_property_tests {
 92 |   pushd python
 93 |   py_dir=.
 94 |   venv_dir=${py_dir}/.venv
 95 |   virtualenv ${venv_dir}
 96 |   set +o nounset
 97 |   source ${venv_dir}/bin/activate
 98 |   set -o nounset
 99 |   pip install pytest hypothesis==1.19.0
100 |   PYTHONPATH=${py_dir} py.test tests
101 |   popd
102 | }
103 | 
104 | function has_cpu_extension {
105 |   local ext=$1
106 |   cat /proc/cpuinfo | grep -q "$ext"
107 | }
108 | 
109 | function run_tests_with_extension {
110 |   local ext=$1
111 |   if has_cpu_extension "${ext}"; then
112 |     run_tests "${ext}"
113 |   fi
114 | }
115 | 
116 | 
117 | if [[ -n "${TRAVIS:-}" ]]; then
118 |   set_travis_env
119 | fi
120 | 
121 | run_tests portable
122 | run_tests_with_extension avx
123 | run_tests_with_extension avx2
124 | 


--------------------------------------------------------------------------------
/src/twiddle/utils/murmur3.c:
--------------------------------------------------------------------------------
  1 | /**
  2 |  * MurmurHash3 was written by Austin Appleby, and is placed in the public
  3 |  * domain. The author hereby disclaims copyright to this source code.
  4 |  *
  5 |  * Note - The x86 and x64 versions do _not_ produce the same results, as the
  6 |  * algorithms are optimized for their respective platforms. You can still
  7 |  * compile and run any of them on any platform, but your performance with the
  8 |  * non-native version will be less than optimal.
  9 |  */
 10 | 
 11 | #include <twiddle/utils/hash.h>
 12 | 
 13 | #include "internal.h"
 14 | 
 15 | /**
 16 |  * Block read - if your platform needs to do endian-swapping or can only
 17 |  * handle aligned reads, do the conversion here
 18 |  */
 19 | 
 20 | #define getblock(p, i) (p[i])
 21 | 
 22 | static inline uint64_t fmix64(uint64_t k)
 23 | {
 24 |   k ^= k >> 33;
 25 |   k *= BIG_CONSTANT(0xff51afd7ed558ccd);
 26 |   k ^= k >> 33;
 27 |   k *= BIG_CONSTANT(0xc4ceb9fe1a85ec53);
 28 |   k ^= k >> 33;
 29 | 
 30 |   return k;
 31 | }
 32 | 
 33 | uint64_t tw_murmur3_64(const uint64_t seed, const void *key,
 34 |                        const size_t key_len)
 35 | {
 36 |   return tw_hash_128_64(tw_murmur3_128(seed, key, key_len));
 37 | }
 38 | 
 39 | tw_uint128_t tw_murmur3_128(const uint64_t seed, const void *key,
 40 |                             const size_t key_len)
 41 | {
 42 |   const uint8_t *data = (const uint8_t *)key;
 43 |   const int nblocks = key_len / 16;
 44 |   int i;
 45 | 
 46 |   uint64_t h1 = seed;
 47 |   uint64_t h2 = seed;
 48 | 
 49 |   const uint64_t c1 = BIG_CONSTANT(0x87c37b91114253d5);
 50 |   const uint64_t c2 = BIG_CONSTANT(0x4cf5ad432745937f);
 51 | 
 52 |   const uint64_t *blocks = (const uint64_t *)(key);
 53 | 
 54 |   // clang-format off
 55 |   for (i = 0; i < nblocks; i++) {
 56 |     uint64_t k1 = getblock(blocks, i * 2 + 0);
 57 |     uint64_t k2 = getblock(blocks, i * 2 + 1);
 58 | 
 59 |     k1 *= c1; k1  = rotl64(k1,31); k1 *= c2; h1 ^= k1;
 60 |     h1 = rotl64(h1,27); h1 += h2; h1 = h1*5+0x52dce729;
 61 |     k2 *= c2; k2  = rotl64(k2,33); k2 *= c1; h2 ^= k2;
 62 |     h2 = rotl64(h2,31); h2 += h1; h2 = h2*5+0x38495ab5;
 63 |   }
 64 | 
 65 |   const uint8_t *tail = (const uint8_t *)(data + nblocks * 16);
 66 | 
 67 |   uint64_t k1 = 0;
 68 |   uint64_t k2 = 0;
 69 | 
 70 |   switch (key_len & 15) {
 71 |   case 15: k2 ^= (uint64_t)(tail[14]) << 48;
 72 |   case 14: k2 ^= (uint64_t)(tail[13]) << 40;
 73 |   case 13: k2 ^= (uint64_t)(tail[12]) << 32;
 74 |   case 12: k2 ^= (uint64_t)(tail[11]) << 24;
 75 |   case 11: k2 ^= (uint64_t)(tail[10]) << 16;
 76 |   case 10: k2 ^= (uint64_t)(tail[ 9]) << 8;
 77 |   case  9: k2 ^= (uint64_t)(tail[ 8]) << 0;
 78 |            k2 *= c2; k2  = rotl64(k2,33); k2 *= c1; h2 ^= k2;
 79 | 
 80 |   case  8: k1 ^= (uint64_t)(tail[ 7]) << 56;
 81 |   case  7: k1 ^= (uint64_t)(tail[ 6]) << 48;
 82 |   case  6: k1 ^= (uint64_t)(tail[ 5]) << 40;
 83 |   case  5: k1 ^= (uint64_t)(tail[ 4]) << 32;
 84 |   case  4: k1 ^= (uint64_t)(tail[ 3]) << 24;
 85 |   case  3: k1 ^= (uint64_t)(tail[ 2]) << 16;
 86 |   case  2: k1 ^= (uint64_t)(tail[ 1]) << 8;
 87 |   case  1: k1 ^= (uint64_t)(tail[ 0]) << 0;
 88 |            k1 *= c1; k1  = rotl64(k1,31); k1 *= c2; h1 ^= k1;
 89 |   default: ;
 90 |   };
 91 | 
 92 |   h1 ^= key_len;
 93 |   h2 ^= key_len;
 94 | 
 95 |   h1 += h2;
 96 |   h2 += h1;
 97 | 
 98 |   h1 = fmix64(h1);
 99 |   h2 = fmix64(h2);
100 | 
101 |   h1 += h2;
102 |   h2 += h1;
103 |   // clang-format on
104 | 
105 |   return (tw_uint128_t){.h = h1, .l = h2};
106 | }
107 | 


--------------------------------------------------------------------------------
/python/twiddle/bloomfilter.py:
--------------------------------------------------------------------------------
  1 | from c import libtwiddle
  2 | from ctypes import c_int, c_long, pointer
  3 | 
  4 | class BloomFilter(object):
  5 |   def __init__(self, size, k, ptr=None):
  6 |     self.bloomfilter = ptr if ptr else libtwiddle.tw_bloomfilter_new(size, k)
  7 |     self.size        = size
  8 |     self.k           = k
  9 | 
 10 | 
 11 |   def __del__(self):
 12 |     if self.bloomfilter:
 13 |       libtwiddle.tw_bloomfilter_free(self.bloomfilter)
 14 | 
 15 | 
 16 |   @classmethod
 17 |   def copy(cls, b):
 18 |     return cls(b.size, b.k, ptr=libtwiddle.tw_bloomfilter_clone(b.bloomfilter))
 19 | 
 20 | 
 21 |   @classmethod
 22 |   def from_iterable(cls, size, k, iterable):
 23 |     bloomfilter = BloomFilter(size, k)
 24 | 
 25 |     for i in iterable:
 26 |       bloomfilter.set(i)
 27 | 
 28 |     return bloomfilter
 29 | 
 30 | 
 31 |   def __len__(self):
 32 |     return self.size
 33 | 
 34 | 
 35 |   def __getitem__(self, x):
 36 |     h = pointer(c_long(hash(x)))
 37 |     return libtwiddle.tw_bloomfilter_test(self.bloomfilter, h, 8)
 38 | 
 39 | 
 40 |   def set(self, x):
 41 |     h = pointer(c_long(hash(x)))
 42 |     libtwiddle.tw_bloomfilter_set(self.bloomfilter, h, 8)
 43 | 
 44 | 
 45 |   def test(self, x):
 46 |     return self[x]
 47 | 
 48 | 
 49 |   def __contains__(self, x):
 50 |     return self[x]
 51 | 
 52 | 
 53 |   def __eq__(self, other):
 54 |     if not isinstance(other, BloomFilter):
 55 |       return False
 56 | 
 57 |     return libtwiddle.tw_bloomfilter_equal(self.bloomfilter, other.bloomfilter)
 58 | 
 59 | 
 60 |   def __neg__(self):
 61 |     ret = BloomFilter.copy(self)
 62 |     libtwiddle.tw_bloomfilter_not(ret.bloomfilter)
 63 |     return ret
 64 | 
 65 | 
 66 |   def __op(self, other, func, copy=lambda x: BloomFilter.copy(x)):
 67 |     if not isinstance(other, BloomFilter):
 68 |       raise ValueError("Must compare BloomFilter to BloomFilter")
 69 | 
 70 |     if self.size != other.size:
 71 |       raise ValueError("BloomFilters must be of equal size to be comparable")
 72 | 
 73 |     ret = copy(self)
 74 | 
 75 |     func(other.bloomfilter, ret.bloomfilter)
 76 | 
 77 |     return ret
 78 | 
 79 | 
 80 |   def __iop(self, other, func):
 81 |     return self.__op(other, func, copy=lambda x: x)
 82 | 
 83 | 
 84 |   def __or__(self, other):
 85 |     return self.__op(other, libtwiddle.tw_bloomfilter_union)
 86 | 
 87 | 
 88 |   def __ior__(self, other):
 89 |     return self.__iop(other, libtwiddle.tw_bloomfilter_union)
 90 | 
 91 | 
 92 |   def __and__(self, other):
 93 |     return self.__op(other, libtwiddle.tw_bloomfilter_intersection)
 94 | 
 95 | 
 96 |   def __iand__(self, other):
 97 |     return self.__iop(other, libtwiddle.tw_bloomfilter_intersection)
 98 | 
 99 | 
100 |   def __xor__(self, other):
101 |     return self.__op(other, libtwiddle.tw_bloomfilter_xor)
102 | 
103 | 
104 |   def __ixor__(self, other):
105 |     return self.__iop(other, libtwiddle.tw_bloomfilter_xor)
106 | 
107 | 
108 |   def empty(self):
109 |     return libtwiddle.tw_bloomfilter_empty(self.bloomfilter)
110 | 
111 | 
112 |   def full(self):
113 |     return libtwiddle.tw_bloomfilter_full(self.bloomfilter)
114 | 
115 | 
116 |   def count(self):
117 |     return libtwiddle.tw_bloomfilter_count(self.bloomfilter)
118 | 
119 | 
120 |   def density(self):
121 |     return libtwiddle.tw_bloomfilter_density(self.bloomfilter)
122 | 
123 | 
124 |   def zero(self):
125 |     libtwiddle.tw_bloomfilter_zero(self.bloomfilter)
126 | 
127 | 
128 |   def fill(self):
129 |     libtwiddle.tw_bloomfilter_fill(self.bloomfilter)
130 | 


--------------------------------------------------------------------------------
/tests/check/check_list.c:
--------------------------------------------------------------------------------
  1 | /*
  2 |  * Check: a unit test framework for C
  3 |  * Copyright (C) 2001, 2002 Arien Malec
  4 |  *
  5 |  * This library is free software; you can redistribute it and/or
  6 |  * modify it under the terms of the GNU Lesser General Public
  7 |  * License as published by the Free Software Foundation; either
  8 |  * version 2.1 of the License, or (at your option) any later version.
  9 |  *
 10 |  * This library is distributed in the hope that it will be useful,
 11 |  * but WITHOUT ANY WARRANTY; without even the implied warranty of
 12 |  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 13 |  * Lesser General Public License for more details.
 14 |  *
 15 |  * You should have received a copy of the GNU Lesser General Public
 16 |  * License along with this library; if not, write to the
 17 |  * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
 18 |  * Boston, MA 02111-1307, USA.
 19 |  */
 20 | 
 21 | #include "libcompat.h"
 22 | 
 23 | #include <stdlib.h>
 24 | #include <string.h>
 25 | 
 26 | #include "check_error.h"
 27 | #include "check_list.h"
 28 | 
 29 | enum { LINIT = 1, LGROW = 2 };
 30 | 
 31 | struct List {
 32 |   unsigned int n_elts;
 33 |   unsigned int max_elts;
 34 |   int current; /* pointer to the current node */
 35 |   int last;    /* pointer to the node before END */
 36 |   void **data;
 37 | };
 38 | 
 39 | static void maybe_grow(List *lp)
 40 | {
 41 |   if (lp->n_elts >= lp->max_elts) {
 42 |     lp->max_elts *= LGROW;
 43 |     lp->data = (void **)erealloc(lp->data, lp->max_elts * sizeof(lp->data[0]));
 44 |   }
 45 | }
 46 | 
 47 | List *check_list_create(void)
 48 | {
 49 |   List *lp;
 50 | 
 51 |   lp = (List *)emalloc(sizeof(List));
 52 |   lp->n_elts = 0;
 53 |   lp->max_elts = LINIT;
 54 |   lp->data = (void **)emalloc(sizeof(lp->data[0]) * LINIT);
 55 |   lp->current = lp->last = -1;
 56 |   return lp;
 57 | }
 58 | 
 59 | void check_list_add_front(List *lp, void *val)
 60 | {
 61 |   if (lp == NULL)
 62 |     return;
 63 |   maybe_grow(lp);
 64 |   memmove(lp->data + 1, lp->data, lp->n_elts * sizeof lp->data[0]);
 65 |   lp->last++;
 66 |   lp->n_elts++;
 67 |   lp->current = 0;
 68 |   lp->data[lp->current] = val;
 69 | }
 70 | 
 71 | void check_list_add_end(List *lp, void *val)
 72 | {
 73 |   if (lp == NULL)
 74 |     return;
 75 |   maybe_grow(lp);
 76 |   lp->last++;
 77 |   lp->n_elts++;
 78 |   lp->current = lp->last;
 79 |   lp->data[lp->current] = val;
 80 | }
 81 | 
 82 | int check_list_at_end(List *lp)
 83 | {
 84 |   if (lp->current == -1)
 85 |     return 1;
 86 |   else
 87 |     return (lp->current > lp->last);
 88 | }
 89 | 
 90 | void check_list_front(List *lp)
 91 | {
 92 |   if (lp->current == -1)
 93 |     return;
 94 |   lp->current = 0;
 95 | }
 96 | 
 97 | void check_list_free(List *lp)
 98 | {
 99 |   if (lp == NULL)
100 |     return;
101 | 
102 |   free(lp->data);
103 |   free(lp);
104 | }
105 | 
106 | void *check_list_val(List *lp)
107 | {
108 |   if (lp == NULL)
109 |     return NULL;
110 |   if (lp->current == -1 || lp->current > lp->last)
111 |     return NULL;
112 | 
113 |   return lp->data[lp->current];
114 | }
115 | 
116 | void check_list_advance(List *lp)
117 | {
118 |   if (lp == NULL)
119 |     return;
120 |   if (check_list_at_end(lp))
121 |     return;
122 |   lp->current++;
123 | }
124 | 
125 | void check_list_apply(List *lp, void (*fp)(void *))
126 | {
127 |   if (lp == NULL || fp == NULL)
128 |     return;
129 | 
130 |   for (check_list_front(lp); !check_list_at_end(lp); check_list_advance(lp))
131 |     fp(check_list_val(lp));
132 | }
133 | 


--------------------------------------------------------------------------------
/python/twiddle/bitmap_rle.py:
--------------------------------------------------------------------------------
  1 | from c import libtwiddle
  2 | 
  3 | class BitmapRLE(object):
  4 |   def __init__(self, size, ptr=None):
  5 |     self.bitmap = ptr if ptr else libtwiddle.tw_bitmap_rle_new(size)
  6 |     self.size   = size
  7 | 
  8 | 
  9 |   def __del__(self):
 10 |     if self.bitmap:
 11 |       libtwiddle.tw_bitmap_rle_free(self.bitmap)
 12 | 
 13 | 
 14 |   @classmethod
 15 |   def copy(cls, b):
 16 |     return cls(b.size, ptr=libtwiddle.tw_bitmap_rle_clone(b.bitmap))
 17 | 
 18 | 
 19 |   @classmethod
 20 |   def from_indices(cls, size, indices):
 21 |     bitmap = BitmapRLE(size)
 22 | 
 23 |     for idx in sorted(indices):
 24 |       bitmap[idx] = True
 25 | 
 26 |     return bitmap
 27 | 
 28 | 
 29 |   def __len__(self):
 30 |     return self.size
 31 | 
 32 | 
 33 |   def __getitem__(self, i):
 34 |     if (i < 0) or (i >= len(self)):
 35 |       raise ValueError("index must be within bitmap bounds")
 36 |     return libtwiddle.tw_bitmap_rle_test(self.bitmap, i)
 37 | 
 38 | 
 39 |   def __setitem__(self, i, value):
 40 |     if (i < 0) or (i >= len(self)):
 41 |       raise ValueError("index must be within bitmap bounds")
 42 | 
 43 |     if not isinstance(value, bool):
 44 |       raise ValueError("BitmapRLE accepts only bool values")
 45 | 
 46 |     if value:
 47 |       libtwiddle.tw_bitmap_rle_set(self.bitmap, i)
 48 | 
 49 | 
 50 |   def __contains__(self, x):
 51 |     if (x < 0) or (x > self.size - 1):
 52 |       return False
 53 | 
 54 |     return self[x]
 55 | 
 56 | 
 57 |   def __eq__(self, other):
 58 |     if not isinstance(other, BitmapRLE):
 59 |       return False
 60 | 
 61 |     return libtwiddle.tw_bitmap_rle_equal(self.bitmap, other.bitmap)
 62 | 
 63 | 
 64 |   def __neg__(self):
 65 |     ret = BitmapRLE.copy(self)
 66 |     libtwiddle.tw_bitmap_rle_not(self.bitmap, ret.bitmap)
 67 |     return ret
 68 | 
 69 | 
 70 |   def __op(self, other, func, copy=lambda x: BitmapRLE.copy(x)):
 71 |     if not isinstance(other, BitmapRLE):
 72 |       raise ValueError("Must compare BitmapRLE to BitmapRLE")
 73 | 
 74 |     if self.size != other.size:
 75 |       raise ValueError("BitmapRLE must be of equal size to be comparable")
 76 | 
 77 |     ret = copy(self)
 78 | 
 79 |     func(self.bitmap, other.bitmap, ret.bitmap)
 80 | 
 81 |     return ret
 82 | 
 83 | 
 84 |   def __iop(self, other, func):
 85 |     return self.__op(other, func)
 86 | 
 87 | 
 88 |   def __or__(self, other):
 89 |     return self.__op(other, libtwiddle.tw_bitmap_rle_union)
 90 | 
 91 | 
 92 |   def __ior__(self, other):
 93 |     return self.__iop(other, libtwiddle.tw_bitmap_rle_union)
 94 | 
 95 | 
 96 |   def __and__(self, other):
 97 |     return self.__op(other, libtwiddle.tw_bitmap_rle_intersection)
 98 | 
 99 | 
100 |   def __iand__(self, other):
101 |     return self.__iop(other, libtwiddle.tw_bitmap_rle_intersection)
102 | 
103 | 
104 |   def empty(self):
105 |     return libtwiddle.tw_bitmap_rle_empty(self.bitmap)
106 | 
107 | 
108 |   def full(self):
109 |     return libtwiddle.tw_bitmap_rle_full(self.bitmap)
110 | 
111 | 
112 |   def count(self):
113 |     return libtwiddle.tw_bitmap_rle_count(self.bitmap)
114 | 
115 | 
116 |   def density(self):
117 |     return libtwiddle.tw_bitmap_rle_density(self.bitmap)
118 | 
119 | 
120 |   def zero(self):
121 |     libtwiddle.tw_bitmap_rle_zero(self.bitmap)
122 | 
123 | 
124 |   def fill(self):
125 |     libtwiddle.tw_bitmap_rle_fill(self.bitmap)
126 | 
127 | 
128 |   def find_first_zero(self):
129 |     return libtwiddle.tw_bitmap_rle_find_first_zero(self.bitmap)
130 | 
131 | 
132 |   def find_first_bit(self):
133 |     return libtwiddle.tw_bitmap_rle_find_first_bit(self.bitmap)
134 | 


--------------------------------------------------------------------------------
/python/twiddle/bloomfilter_a2.py:
--------------------------------------------------------------------------------
  1 | from c import libtwiddle
  2 | from ctypes import c_int, c_long, pointer
  3 | 
  4 | class BloomFilterA2(object):
  5 |   def __init__(self, size, k, density, ptr=None):
  6 |     self.bloomfilter = ptr if ptr else libtwiddle.tw_bloomfilter_a2_new(size, k, density)
  7 |     self.size        = size
  8 |     self.k           = k
  9 |     self.density     = density
 10 | 
 11 | 
 12 |   def __del__(self):
 13 |     if self.bloomfilter:
 14 |       libtwiddle.tw_bloomfilter_a2_free(self.bloomfilter)
 15 | 
 16 | 
 17 |   @classmethod
 18 |   def copy(cls, b):
 19 |     return cls(b.size, b.k, ptr=libtwiddle.tw_bloomfilter_a2_clone(b.bloomfilter))
 20 | 
 21 | 
 22 |   @classmethod
 23 |   def from_iterable(cls, size, k, density, iterable):
 24 |     bloomfilter = BloomFilterA2(size, k, density)
 25 | 
 26 |     for i in iterable:
 27 |       bloomfilter.set(i)
 28 | 
 29 |     return bloomfilter
 30 | 
 31 | 
 32 |   def __len__(self):
 33 |     return self.size
 34 | 
 35 | 
 36 |   def __getitem__(self, x):
 37 |     h = pointer(c_long(hash(x)))
 38 |     return libtwiddle.tw_bloomfilter_a2_test(self.bloomfilter, h, 8)
 39 | 
 40 | 
 41 |   def set(self, x):
 42 |     h = pointer(c_long(hash(x)))
 43 |     libtwiddle.tw_bloomfilter_a2_set(self.bloomfilter, h, 8)
 44 | 
 45 | 
 46 |   def test(self, x):
 47 |     return self[x]
 48 | 
 49 | 
 50 |   def __contains__(self, x):
 51 |     return self[x]
 52 | 
 53 | 
 54 |   def __eq__(self, other):
 55 |     if not isinstance(other, BloomFilterA2):
 56 |       return False
 57 | 
 58 |     return libtwiddle.tw_bloomfilter_a2_equal(self.bloomfilter, other.bloomfilter)
 59 | 
 60 | 
 61 |   def __neg__(self):
 62 |     ret = BloomFilterA2.copy(self)
 63 |     libtwiddle.tw_bloomfilter_a2_not(ret.bloomfilter)
 64 |     return ret
 65 | 
 66 | 
 67 |   def __op(self, other, func, copy=lambda x: BloomFilterA2.copy(x)):
 68 |     if not isinstance(other, BloomFilterA2):
 69 |       raise ValueError("Must compare BloomFilterA2 to BloomFilterA2")
 70 | 
 71 |     if self.size != other.size:
 72 |       raise ValueError("BloomFiltersA2 must be of equal size to be comparable")
 73 | 
 74 |     ret = copy(self)
 75 | 
 76 |     func(other.bloomfilter, ret.bloomfilter)
 77 | 
 78 |     return ret
 79 | 
 80 | 
 81 |   def __iop(self, other, func):
 82 |     return self.__op(other, func, copy=lambda x: x)
 83 | 
 84 | 
 85 |   def __or__(self, other):
 86 |     return self.__op(other, libtwiddle.tw_bloomfilter_a2_union)
 87 | 
 88 | 
 89 |   def __ior__(self, other):
 90 |     return self.__iop(other, libtwiddle.tw_bloomfilter_a2_union)
 91 | 
 92 | 
 93 |   def __and__(self, other):
 94 |     return self.__op(other, libtwiddle.tw_bloomfilter_a2_intersection)
 95 | 
 96 | 
 97 |   def __iand__(self, other):
 98 |     return self.__iop(other, libtwiddle.tw_bloomfilter_a2_intersection)
 99 | 
100 | 
101 |   def __xor__(self, other):
102 |     return self.__op(other, libtwiddle.tw_bloomfilter_a2_xor)
103 | 
104 | 
105 |   def __ixor__(self, other):
106 |     return self.__iop(other, libtwiddle.tw_bloomfilter_a2_xor)
107 | 
108 | 
109 |   def empty(self):
110 |     return libtwiddle.tw_bloomfilter_a2_empty(self.bloomfilter)
111 | 
112 | 
113 |   def full(self):
114 |     return libtwiddle.tw_bloomfilter_a2_full(self.bloomfilter)
115 | 
116 | 
117 |   def count(self):
118 |     return libtwiddle.tw_bloomfilter_a2_count(self.bloomfilter)
119 | 
120 | 
121 |   def density(self):
122 |     return libtwiddle.tw_bloomfilter_a2_density(self.bloomfilter)
123 | 
124 | 
125 |   def zero(self):
126 |     libtwiddle.tw_bloomfilter_a2_zero(self.bloomfilter)
127 | 
128 | 
129 |   def fill(self):
130 |     libtwiddle.tw_bloomfilter_a2_fill(self.bloomfilter)
131 | 


--------------------------------------------------------------------------------
/python/twiddle/bitmap.py:
--------------------------------------------------------------------------------
  1 | from c import libtwiddle
  2 | 
  3 | class Bitmap(object):
  4 |   def __init__(self, size, ptr=None):
  5 |     self.bitmap = ptr if ptr else libtwiddle.tw_bitmap_new(size)
  6 |     self.size   = size
  7 | 
  8 | 
  9 |   def __del__(self):
 10 |     if self.bitmap:
 11 |       libtwiddle.tw_bitmap_free(self.bitmap)
 12 | 
 13 | 
 14 |   @classmethod
 15 |   def copy(cls, b):
 16 |     return cls(b.size, ptr=libtwiddle.tw_bitmap_clone(b.bitmap))
 17 | 
 18 | 
 19 |   @classmethod
 20 |   def from_indices(cls, size, indices):
 21 |     bitmap = Bitmap(size)
 22 | 
 23 |     for idx in indices:
 24 |       bitmap[idx] = True
 25 | 
 26 |     return bitmap
 27 | 
 28 | 
 29 |   def __len__(self):
 30 |     return self.size
 31 | 
 32 | 
 33 |   def __getitem__(self, i):
 34 |     if (i < 0) or (i >= len(self)):
 35 |       raise ValueError("index must be within bitmap bounds")
 36 |     return libtwiddle.tw_bitmap_test(self.bitmap, i)
 37 | 
 38 | 
 39 |   def __setitem__(self, i, value):
 40 |     if (i < 0) or (i >= len(self)):
 41 |       raise ValueError("index must be within bitmap bounds")
 42 | 
 43 |     if not isinstance(value, bool):
 44 |       raise ValueError("Bitmap accepts only bool values")
 45 | 
 46 |     if value:
 47 |       libtwiddle.tw_bitmap_set(self.bitmap, i)
 48 |     else:
 49 |       libtwiddle.tw_bitmap_clear(self.bitmap, i)
 50 | 
 51 | 
 52 |   def __contains__(self, x):
 53 |     if (x < 0) or (x > self.size - 1):
 54 |       return False
 55 | 
 56 |     return self[x]
 57 | 
 58 | 
 59 |   def __eq__(self, other):
 60 |     if not isinstance(other, Bitmap):
 61 |       return False
 62 | 
 63 |     return libtwiddle.tw_bitmap_equal(self.bitmap, other.bitmap)
 64 | 
 65 | 
 66 |   def __neg__(self):
 67 |     ret = Bitmap.copy(self)
 68 |     libtwiddle.tw_bitmap_not(ret.bitmap)
 69 |     return ret
 70 | 
 71 | 
 72 |   def __op(self, other, func, copy=lambda x: Bitmap.copy(x)):
 73 |     if not isinstance(other, Bitmap):
 74 |       raise ValueError("Must compare Bitmap to Bitmap")
 75 | 
 76 |     if self.size != other.size:
 77 |       raise ValueError("Bitmaps must be of equal size to be comparable")
 78 | 
 79 |     ret = copy(self)
 80 | 
 81 |     func(other.bitmap, ret.bitmap)
 82 | 
 83 |     return ret
 84 | 
 85 | 
 86 |   def __iop(self, other, func):
 87 |     return self.__op(other, func, copy=lambda x: x)
 88 | 
 89 | 
 90 |   def __or__(self, other):
 91 |     return self.__op(other, libtwiddle.tw_bitmap_union)
 92 | 
 93 | 
 94 |   def __ior__(self, other):
 95 |     return self.__iop(other, libtwiddle.tw_bitmap_union)
 96 | 
 97 | 
 98 |   def __and__(self, other):
 99 |     return self.__op(other, libtwiddle.tw_bitmap_intersection)
100 | 
101 | 
102 |   def __iand__(self, other):
103 |     return self.__iop(other, libtwiddle.tw_bitmap_intersection)
104 | 
105 | 
106 |   def __xor__(self, other):
107 |     return self.__op(other, libtwiddle.tw_bitmap_xor)
108 | 
109 | 
110 |   def __ixor__(self, other):
111 |     return self.__iop(other, libtwiddle.tw_bitmap_xor)
112 | 
113 | 
114 |   def empty(self):
115 |     return libtwiddle.tw_bitmap_empty(self.bitmap)
116 | 
117 | 
118 |   def full(self):
119 |     return libtwiddle.tw_bitmap_full(self.bitmap)
120 | 
121 | 
122 |   def count(self):
123 |     return libtwiddle.tw_bitmap_count(self.bitmap)
124 | 
125 | 
126 |   def density(self):
127 |     return libtwiddle.tw_bitmap_density(self.bitmap)
128 | 
129 | 
130 |   def zero(self):
131 |     libtwiddle.tw_bitmap_zero(self.bitmap)
132 | 
133 | 
134 |   def fill(self):
135 |     libtwiddle.tw_bitmap_fill(self.bitmap)
136 | 
137 | 
138 |   def find_first_zero(self):
139 |     return libtwiddle.tw_bitmap_find_first_zero(self.bitmap)
140 | 
141 | 
142 |   def find_first_bit(self):
143 |     return libtwiddle.tw_bitmap_find_first_bit(self.bitmap)
144 | 


--------------------------------------------------------------------------------
/EXAMPLES.md:
--------------------------------------------------------------------------------
  1 | EXAMPLES
  2 | ========
  3 | 
  4 | bitmap
  5 | ------
  6 | 
  7 | ```C
  8 | #include <assert.h>
  9 | #include <twiddle/bitmap/bitmap.h>
 10 | 
 11 | int main() {
 12 |   const uint64_t nbits = 1024;
 13 |   struct tw_bitmap* bitmap = tw_bitmap_new(nbits);
 14 | 
 15 |   assert(bitmap);
 16 | 
 17 |   tw_bitmap_set(bitmap, 512);
 18 |   assert(tw_bitmap_test_and_clear(bitmap, 512));
 19 |   assert(!tw_bitmap_test(bitmap, 512));
 20 | 
 21 |   tw_bitmap_set(bitmap, 768);
 22 |   assert(tw_bitmap_find_first_bit(bitmap) == 768);
 23 | 
 24 |   tw_bitmap_free(bitmap);
 25 | 
 26 |   return 0;
 27 | }
 28 | ```
 29 | 
 30 | bitmap-rle
 31 | ------
 32 | 
 33 | ```C
 34 | #include <assert.h>
 35 | #include <twiddle/bitmap/bitmap_rle.h>
 36 | 
 37 | int main() {
 38 |   /** allocate a bitmap containing 2 billions bits */
 39 |   const uint64_t nbits = 1UL << 31;
 40 |   struct tw_bitmap_rle* bitmap = tw_bitmap_rle_new(nbits);
 41 | 
 42 |   assert(bitmap);
 43 | 
 44 |   /** fill 1 billion bits */
 45 |   const uint64_t start = 0UL;
 46 |   const uint64_t end = 1UL << 30;
 47 |   tw_bitmap_rle_set_range(bitmap, start, end);
 48 | 
 49 |   /**
 50 |    * bitmap_rle DOES NOT support setting bits in non sorted order, e.g.
 51 |    * calling the following will raise an assert() call (or undefined behaviour
 52 |    * if compiled with NDEBUG):
 53 |    *
 54 |    * tw_bitmap_rle_set(bitmap, start - 1);
 55 |    */
 56 | 
 57 |   assert(tw_bitmap_rle_test(bitmap, start));
 58 |   assert(tw_bitmap_rle_test(bitmap, end));
 59 |   assert(tw_bitmap_rle_find_first_bit(bitmap)  == (int64_t)start);
 60 |   assert(tw_bitmap_rle_find_first_zero(bitmap) == (int64_t)end + 1);
 61 | 
 62 |   tw_bitmap_rle_free(bitmap);
 63 | 
 64 |   return 0;
 65 | }
 66 | ```
 67 | 
 68 | bloomfilter
 69 | -----------
 70 | 
 71 | ```C
 72 | #include <assert.h>
 73 | #include <string.h>
 74 | 
 75 | #include <twiddle/bloomfilter/bloomfilter.h>
 76 | 
 77 | int main() {
 78 |   const uint64_t nbits = 1024;
 79 |   const uint16_t k = 7;
 80 |   struct tw_bloomfilter *bf = tw_bloomfilter_new(nbits, k);
 81 |   assert(bf);
 82 | 
 83 |   const char *values[] = {"herp", "derp", "ferp", "merp"};
 84 | 
 85 |   for (size_t i = 0; i < ((sizeof(values) / sizeof(values[0]))); ++i) {
 86 |     tw_bloomfilter_set(bf, strlen(values[i]), values[i]);
 87 |     assert(tw_bloomfilter_test(bf, strlen(values[i]), values[i]));
 88 |   }
 89 | 
 90 |   assert(!tw_bloomfilter_test(bf, sizeof("nope"), "nope"));
 91 | 
 92 |   return 0;
 93 | }
 94 | ```
 95 | 
 96 | hyperloglog
 97 | -----------
 98 | 
 99 | ```C
100 | #include <assert.h>
101 | #include <stdio.h>
102 | 
103 | #include <twiddle/hyperloglog/hyperloglog.h>
104 | 
105 | int main() {
106 |   const uint8_t precision = 16;
107 |   struct tw_hyperloglog *hll = tw_hyperloglog_new(precision);
108 |   assert(hll);
109 | 
110 |   const uint32_t n_elems = 10 * (1 << precision);
111 |   for (size_t i = 0; i < n_elems ; ++i) {
112 |     tw_hyperloglog_add(hll, sizeof(i), (char *) &i);
113 |   }
114 | 
115 |   printf("estimated count: %f, real count: %d\n",
116 |          tw_hyperloglog_count(hll),
117 |          n_elems);
118 | 
119 |   tw_hyperloglog_free(hll);
120 | 
121 |   return 0;
122 | }
123 | ```
124 | 
125 | minhash
126 | -----------
127 | 
128 | ```C
129 | #include <assert.h>
130 | #include <stdio.h>
131 | 
132 | #include <twiddle/hash/minhash.h>
133 | 
134 | int main()
135 | {
136 |   const uint32_t n_registers = 1 << 13;
137 |   struct tw_minhash *a = tw_minhash_new(n_registers);
138 |   assert(a);
139 |   struct tw_minhash *b = tw_minhash_clone(a);
140 |   assert(b);
141 | 
142 |   const uint32_t n_elems = 10 * n_registers;
143 |   for (size_t i = 0; i < n_elems; ++i) {
144 |     if (i % 3 == 0) {
145 |       tw_minhash_add(a, (char *)&i, sizeof(i));
146 |     }
147 | 
148 |     if (i % 5 == 0) {
149 |       tw_minhash_add(b, (char *)&i, sizeof(i));
150 |     }
151 |   }
152 | 
153 |   printf("estimated jaccard: %f\n", tw_minhash_estimate(a, b));
154 | 
155 |   tw_minhash_free(b);
156 |   tw_minhash_free(a);
157 | 
158 |   return 0;
159 | }
160 | ```
161 | 


--------------------------------------------------------------------------------
/tests/check/check_str.c:
--------------------------------------------------------------------------------
  1 | /*
  2 |  * Check: a unit test framework for C
  3 |  * Copyright (C) 2001, 2002 Arien Malec
  4 |  *
  5 |  * This library is free software; you can redistribute it and/or
  6 |  * modify it under the terms of the GNU Lesser General Public
  7 |  * License as published by the Free Software Foundation; either
  8 |  * version 2.1 of the License, or (at your option) any later version.
  9 |  *
 10 |  * This library is distributed in the hope that it will be useful,
 11 |  * but WITHOUT ANY WARRANTY; without even the implied warranty of
 12 |  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 13 |  * Lesser General Public License for more details.
 14 |  *
 15 |  * You should have received a copy of the GNU Lesser General Public
 16 |  * License along with this library; if not, write to the
 17 |  * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
 18 |  * Boston, MA 02111-1307, USA.
 19 |  */
 20 | 
 21 | #include "libcompat.h"
 22 | 
 23 | #include <stdarg.h>
 24 | #include <stdio.h>
 25 | 
 26 | #include "check.h"
 27 | #include "check_error.h"
 28 | #include "check_impl.h"
 29 | #include "check_list.h"
 30 | #include "check_str.h"
 31 | 
 32 | static const char *tr_type_str(TestResult *tr);
 33 | static int percent_passed(TestStats *t);
 34 | 
 35 | char *tr_str(TestResult *tr)
 36 | {
 37 |   const char *exact_msg;
 38 |   char *rstr;
 39 | 
 40 |   exact_msg = (tr->rtype == CK_ERROR) ? "(after this point) " : "";
 41 | 
 42 |   rstr = ck_strdup_printf("%s:%d:%s:%s:%s:%d: %s%s", tr->file, tr->line,
 43 |                           tr_type_str(tr), tr->tcname, tr->tname, tr->iter,
 44 |                           exact_msg, tr->msg);
 45 | 
 46 |   return rstr;
 47 | }
 48 | 
 49 | char *tr_short_str(TestResult *tr)
 50 | {
 51 |   const char *exact_msg;
 52 |   char *rstr;
 53 | 
 54 |   exact_msg = (tr->rtype == CK_ERROR) ? "(after this point) " : "";
 55 | 
 56 |   rstr =
 57 |       ck_strdup_printf("%s:%d: %s%s", tr->file, tr->line, exact_msg, tr->msg);
 58 | 
 59 |   return rstr;
 60 | }
 61 | 
 62 | char *sr_stat_str(SRunner *sr)
 63 | {
 64 |   char *str;
 65 |   TestStats *ts;
 66 | 
 67 |   ts = sr->stats;
 68 | 
 69 |   str = ck_strdup_printf("%d%%: Checks: %d, Failures: %d, Errors: %d",
 70 |                          percent_passed(ts), ts->n_checked, ts->n_failed,
 71 |                          ts->n_errors);
 72 | 
 73 |   return str;
 74 | }
 75 | 
 76 | __attribute__((format(printf, 1, 2))) char *ck_strdup_printf(const char *fmt,
 77 |                                                              ...)
 78 | {
 79 |   /* Guess we need no more than 100 bytes. */
 80 |   int n;
 81 |   size_t size = 100;
 82 |   char *p;
 83 |   va_list ap;
 84 | 
 85 |   p = (char *)emalloc(size);
 86 | 
 87 |   while (1) {
 88 |     /* Try to print in the allocated space. */
 89 |     va_start(ap, fmt);
 90 |     n = vsnprintf(p, size, fmt, ap);
 91 |     va_end(ap);
 92 |     /* If that worked, return the string. */
 93 |     if (n > -1 && n < (int)size)
 94 |       return p;
 95 | 
 96 |     /* Else try again with more space. */
 97 |     if (n > -1)             /* C99 conform vsnprintf() */
 98 |       size = (size_t)n + 1; /* precisely what is needed */
 99 |     else                    /* glibc 2.0 */
100 |       size *= 2;            /* twice the old size */
101 | 
102 |     p = (char *)erealloc(p, size);
103 |   }
104 | }
105 | 
106 | static const char *tr_type_str(TestResult *tr)
107 | {
108 |   const char *str = NULL;
109 | 
110 |   if (tr->ctx == CK_CTX_TEST) {
111 |     if (tr->rtype == CK_PASS)
112 |       str = "P";
113 |     else if (tr->rtype == CK_FAILURE)
114 |       str = "F";
115 |     else if (tr->rtype == CK_ERROR)
116 |       str = "E";
117 |   } else
118 |     str = "S";
119 | 
120 |   return str;
121 | }
122 | 
123 | static int percent_passed(TestStats *t)
124 | {
125 |   if (t->n_failed == 0 && t->n_errors == 0)
126 |     return 100;
127 |   else if (t->n_checked == 0)
128 |     return 0;
129 |   else
130 |     return (int)((float)(t->n_checked - (t->n_failed + t->n_errors)) /
131 |                  (float)t->n_checked * 100);
132 | }
133 | 


--------------------------------------------------------------------------------
/tests/benchmarks/benchmark.h:
--------------------------------------------------------------------------------
  1 | #ifndef TESTS_BENCHMARKS_BENCHMARK_H_
  2 | #define TESTS_BENCHMARKS_BENCHMARK_H_
  3 | 
  4 | #include <assert.h>
  5 | #include <stdio.h>
  6 | 
  7 | struct benchmark {
  8 |   const char *name;
  9 | 
 10 |   size_t size;
 11 |   /* number of repetitions */
 12 |   size_t repeat;
 13 |   /* skip the first `size` results for warmup */
 14 |   size_t skip;
 15 | 
 16 |   void *opaque;
 17 | 
 18 |   void (*setup)(struct benchmark *);
 19 |   void (*benchmark)(void *);
 20 |   void (*teardown)(struct benchmark *);
 21 | };
 22 | 
 23 | #define BENCHMARK_FIXTURE(bench, b_repeat, b_size, b_setup, b_teardown)        \
 24 |   ((struct benchmark){.name = #bench,                                          \
 25 |                       .size = (b_size),                                        \
 26 |                       .repeat = (b_repeat),                                    \
 27 |                       .skip = (size_t)((b_repeat)*0.05),                       \
 28 |                       .benchmark = bench,                                      \
 29 |                       .setup = b_setup,                                        \
 30 |                       .teardown = b_teardown})
 31 | 
 32 | #define BENCHMARK(bench, b_repeat, b_size)                                     \
 33 |   BENCHMARK_FIXTURE(bench, b_repeat, b_size, NULL, NULL)
 34 | 
 35 | void run_benchmark(struct benchmark *b);
 36 | 
 37 | void run_benchmarks(struct benchmark *b, size_t size)
 38 | {
 39 |   assert(b && size);
 40 | 
 41 |   for (size_t i = 0; i < size; ++i) {
 42 |     run_benchmark(&b[i]);
 43 |   }
 44 | }
 45 | 
 46 | #define RDTSC_START(cycles)                                                    \
 47 |   do {                                                                         \
 48 |     register uint32_t cyc_high, cyc_low;                                       \
 49 |     __asm volatile("cpuid\n\t"                                                 \
 50 |                    "rdtsc\n\t"                                                 \
 51 |                    "mov %%edx, %0\n\t"                                         \
 52 |                    "mov %%eax, %1\n\t"                                         \
 53 |                    : "=r"(cyc_high), "=r"(cyc_low)::"%rax", "%rbx", "%rcx",    \
 54 |                      "%rdx");                                                  \
 55 |     (cycles) = ((uint64_t)cyc_high << 32) | cyc_low;                           \
 56 |   } while (0)
 57 | 
 58 | #define RDTSC_FINAL(cycles)                                                    \
 59 |   do {                                                                         \
 60 |     register uint32_t cyc_high, cyc_low;                                       \
 61 |     __asm volatile("rdtscp\n\t"                                                \
 62 |                    "mov %%edx, %0\n\t"                                         \
 63 |                    "mov %%eax, %1\n\t"                                         \
 64 |                    "cpuid\n\t"                                                 \
 65 |                    : "=r"(cyc_high), "=r"(cyc_low)::"%rax", "%rbx", "%rcx",    \
 66 |                      "%rdx");                                                  \
 67 |     (cycles) = ((uint64_t)cyc_high << 32) | cyc_low;                           \
 68 |   } while (0)
 69 | 
 70 | void run_benchmark(struct benchmark *b)
 71 | {
 72 |   assert(b);
 73 | 
 74 |   const size_t repeat = b->repeat;
 75 |   const size_t size = b->size;
 76 |   const size_t skip = b->skip;
 77 |   const char *name = b->name;
 78 | 
 79 |   if (b->setup) {
 80 |     b->setup(b);
 81 |   }
 82 | 
 83 |   for (size_t i = 0; i < repeat; i++) {
 84 |     uint64_t cycles_start, cycles_final;
 85 | 
 86 |     __asm volatile("" ::: /* pretend to clobber */ "memory");
 87 | 
 88 |     RDTSC_START(cycles_start);
 89 | 
 90 |     b->benchmark(b->opaque);
 91 | 
 92 |     RDTSC_FINAL(cycles_final);
 93 | 
 94 |     if (i >= skip) {
 95 |       printf("%s,%.2F\n", name,
 96 |              ((double)(cycles_final - cycles_start) / (double)size));
 97 |     }
 98 |   }
 99 | 
100 |   if (b->teardown) {
101 |     b->teardown(b);
102 |   }
103 | }
104 | 
105 | #endif /* TESTS_BENCHMARKS_BENCHMARK_H_ */
106 | 


--------------------------------------------------------------------------------
/tests/check/libcompat.h:
--------------------------------------------------------------------------------
  1 | #ifndef LIBCOMPAT_H
  2 | #define LIBCOMPAT_H
  3 | 
  4 | #if defined(__GNUC__) && defined(__GNUC_MINOR__)
  5 | #define GCC_VERSION_AT_LEAST(major, minor)                                     \
  6 |   ((__GNUC__ > (major)) || (__GNUC__ == (major) && __GNUC_MINOR__ >= (minor)))
  7 | #else
  8 | #define GCC_VERSION_AT_LEAST(major, minor) 0
  9 | #endif
 10 | 
 11 | #if GCC_VERSION_AT_LEAST(2, 95)
 12 | #define CK_ATTRIBUTE_UNUSED __attribute__((unused))
 13 | #else
 14 | #define CK_ATTRIBUTE_UNUSED
 15 | #endif /* GCC 2.95 */
 16 | 
 17 | #if GCC_VERSION_AT_LEAST(2, 5)
 18 | #define CK_ATTRIBUTE_NORETURN __attribute__((noreturn))
 19 | #else
 20 | #define CK_ATTRIBUTE_NORETURN
 21 | #endif /* GCC 2.5 */
 22 | 
 23 | /*
 24 |  * Used for MSVC to create the export attribute
 25 |  * CK_DLL_EXP is defined during the compilation of the library
 26 |  * on the command line.
 27 |  */
 28 | #ifndef CK_DLL_EXP
 29 | #define CK_DLL_EXP
 30 | #endif
 31 | 
 32 | /* defines size_t */
 33 | #include <sys/types.h>
 34 | 
 35 | /* provides assert */
 36 | #include <assert.h>
 37 | 
 38 | /* defines FILE */
 39 | #include <stdio.h>
 40 | 
 41 | /* defines exit() */
 42 | #include <stdlib.h>
 43 | 
 44 | /* provides localtime and struct tm */
 45 | #include <sys/time.h>
 46 | #include <time.h>
 47 | 
 48 | /* declares fork(), _POSIX_VERSION.  according to Autoconf.info,
 49 |    unistd.h defines _POSIX_VERSION if the system is POSIX-compliant,
 50 |    so we will use this as a test for all things uniquely provided by
 51 |    POSIX like sigaction() and fork() */
 52 | #ifdef HAVE_UNISTD_H
 53 | #include <unistd.h>
 54 | #endif
 55 | 
 56 | #ifdef HAVE_SYS_WAIT_H
 57 | #include <sys/wait.h>
 58 | #endif
 59 | 
 60 | /* declares pthread_create and friends */
 61 | #ifdef HAVE_PTHREAD
 62 | #include <pthread.h>
 63 | #endif
 64 | 
 65 | #ifdef HAVE_STDINT_H
 66 | #include <stdint.h>
 67 | #endif
 68 | 
 69 | /*
 70 |  * On systems where clock_gettime() is not available, or
 71 |  * on systems where some clocks may not be supported, the
 72 |  * definition for CLOCK_MONOTONIC and CLOCK_REALTIME may not
 73 |  * be available. These should define which type of clock
 74 |  * clock_gettime() should use. We define it here if it is
 75 |  * not defined simply so the reimplementation can ignore it.
 76 |  *
 77 |  * We set the values of these clocks to some (hopefully)
 78 |  * invalid value, to avoid the case where we define a
 79 |  * clock with a valid value, and unintentionally use
 80 |  * an actual good clock by accident.
 81 |  */
 82 | #ifndef CLOCK_MONOTONIC
 83 | #define CLOCK_MONOTONIC -1
 84 | #endif
 85 | #ifndef CLOCK_REALTIME
 86 | #define CLOCK_REALTIME -1
 87 | #endif
 88 | 
 89 | #ifndef HAVE_LIBRT
 90 | 
 91 | #ifdef STRUCT_TIMESPEC_DEFINITION_MISSING
 92 | /*
 93 |  * The following structure is defined in POSIX 1003.1 for times
 94 |  * specified in seconds and nanoseconds. If it is not defined in
 95 |  * time.g, then we need to define it here
 96 |  */
 97 | struct timespec {
 98 |   time_t tv_sec;
 99 |   long tv_nsec;
100 | };
101 | #endif /* STRUCT_TIMESPEC_DEFINITION_MISSING */
102 | 
103 | #ifdef STRUCT_ITIMERSPEC_DEFINITION_MISSING
104 | /*
105 |  * The following structure is defined in POSIX.1b for timer start values and
106 |  * intervals.
107 |  * If it is not defined in time.h, then we need to define it here.
108 |  */
109 | struct itimerspec {
110 |   struct timespec it_interval;
111 |   struct timespec it_value;
112 | };
113 | #endif /* STRUCT_ITIMERSPEC_DEFINITION_MISSING */
114 | 
115 | /*
116 |  * Do a simple forward declaration in case the struct is not defined.
117 |  * In the versions of timer_create in libcompat, sigevent is never
118 |  * used.
119 |  */
120 | struct sigevent;
121 | 
122 | CK_DLL_EXP int clock_gettime(clockid_t clk_id, struct timespec *ts);
123 | CK_DLL_EXP int timer_create(clockid_t clockid, struct sigevent *sevp,
124 |                             timer_t *timerid);
125 | CK_DLL_EXP int timer_settime(timer_t timerid, int flags,
126 |                              const struct itimerspec *new_value,
127 |                              struct itimerspec *old_value);
128 | CK_DLL_EXP int timer_delete(timer_t timerid);
129 | #endif /* HAVE_LIBRT */
130 | 
131 | /* silence warnings about an empty library */
132 | CK_DLL_EXP void ck_do_nothing(void) CK_ATTRIBUTE_NORETURN;
133 | 
134 | #endif /* !LIBCOMPAT_H */
135 | 


--------------------------------------------------------------------------------
/tests/check/check_impl.h:
--------------------------------------------------------------------------------
  1 | /*
  2 |  * Check: a unit test framework for C
  3 |  * Copyright (C) 2001,2002 Arien Malec
  4 |  *
  5 |  * This library is free software; you can redistribute it and/or
  6 |  * modify it under the terms of the GNU Lesser General Public
  7 |  * License as published by the Free Software Foundation; either
  8 |  * version 2.1 of the License, or (at your option) any later version.
  9 |  *
 10 |  * This library is distributed in the hope that it will be useful,
 11 |  * but WITHOUT ANY WARRANTY; without even the implied warranty of
 12 |  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 13 |  * Lesser General Public License for more details.
 14 |  *
 15 |  * You should have received a copy of the GNU Lesser General Public
 16 |  * License along with this library; if not, write to the
 17 |  * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
 18 |  * Boston, MA 02111-1307, USA.
 19 |  */
 20 | 
 21 | #ifndef CHECK_IMPL_H
 22 | #define CHECK_IMPL_H
 23 | 
 24 | #include "check_list.h"
 25 | 
 26 | /* This header should be included by any module that needs
 27 |    to know the implementation details of the check structures
 28 |    Include stdio.h, time.h, & list.h before this header
 29 | */
 30 | 
 31 | #define US_PER_SEC 1000000
 32 | #define NANOS_PER_SECONDS 1000000000
 33 | 
 34 | /** calculate the difference in useconds out of two "struct timespec"s */
 35 | #define DIFF_IN_USEC(begin, end)                                               \
 36 |   ((((end).tv_sec - (begin).tv_sec) * US_PER_SEC) + ((end).tv_nsec / 1000) -   \
 37 |    ((begin).tv_nsec / 1000))
 38 | 
 39 | typedef struct TF {
 40 |   TFun fn;
 41 |   int loop_start;
 42 |   int loop_end;
 43 |   const char *name;
 44 |   int signal;
 45 |   signed char allowed_exit_value;
 46 | } TF;
 47 | 
 48 | struct Suite {
 49 |   const char *name;
 50 |   List *tclst; /* List of test cases */
 51 | };
 52 | 
 53 | typedef struct Fixture {
 54 |   int ischecked;
 55 |   SFun fun;
 56 | } Fixture;
 57 | 
 58 | struct TCase {
 59 |   const char *name;
 60 |   struct timespec timeout;
 61 |   List *tflst; /* list of test functions */
 62 |   List *unch_sflst;
 63 |   List *unch_tflst;
 64 |   List *ch_sflst;
 65 |   List *ch_tflst;
 66 | };
 67 | 
 68 | typedef struct TestStats {
 69 |   int n_checked;
 70 |   int n_failed;
 71 |   int n_errors;
 72 | } TestStats;
 73 | 
 74 | struct TestResult {
 75 |   enum test_result rtype; /* Type of result */
 76 |   enum ck_result_ctx ctx; /* When the result occurred */
 77 |   char *file;             /* File where the test occured */
 78 |   int line;               /* Line number where the test occurred */
 79 |   int iter;               /* The iteration value for looping tests */
 80 |   int duration;           /* duration of this test in microseconds */
 81 |   const char *tcname;     /* Test case that generated the result */
 82 |   const char *tname;      /* Test that generated the result */
 83 |   char *msg;              /* Failure message */
 84 | };
 85 | 
 86 | TestResult *tr_create(void);
 87 | void tr_reset(TestResult *tr);
 88 | void tr_free(TestResult *tr);
 89 | 
 90 | enum cl_event {
 91 |   CLINITLOG_SR, /* Initialize log file */
 92 |   CLENDLOG_SR,  /* Tests are complete */
 93 |   CLSTART_SR,   /* Suite runner start */
 94 |   CLSTART_S,    /* Suite start */
 95 |   CLEND_SR,     /* Suite runner end */
 96 |   CLEND_S,      /* Suite end */
 97 |   CLSTART_T,    /* A test case is about to run */
 98 |   CLEND_T       /* Test case end */
 99 | };
100 | 
101 | typedef void (*LFun)(SRunner *, FILE *, enum print_output, void *,
102 |                      enum cl_event);
103 | 
104 | typedef struct Log {
105 |   FILE *lfile;
106 |   LFun lfun;
107 |   int close;
108 |   enum print_output mode;
109 | } Log;
110 | 
111 | struct SRunner {
112 |   List *slst;             /* List of Suite objects */
113 |   TestStats *stats;       /* Run statistics */
114 |   List *resultlst;        /* List of unit test results */
115 |   const char *log_fname;  /* name of log file */
116 |   const char *xml_fname;  /* name of xml output file */
117 |   const char *tap_fname;  /* name of tap output file */
118 |   List *loglst;           /* list of Log objects */
119 |   enum fork_status fstat; /* controls if suites are forked or not
120 |                              NOTE: Don't use this value directly,
121 |                              instead use srunner_fork_status */
122 | };
123 | 
124 | void set_fork_status(enum fork_status fstat);
125 | enum fork_status cur_fork_status(void);
126 | 
127 | clockid_t check_get_clockid(void);
128 | 
129 | #endif /* CHECK_IMPL_H */
130 | 


--------------------------------------------------------------------------------
/tests/examples/bf-uniq.c:
--------------------------------------------------------------------------------
  1 | #include <getopt.h>
  2 | #include <stdio.h>
  3 | #include <stdlib.h>
  4 | #include <string.h>
  5 | #include <time.h>
  6 | 
  7 | #include "../../src/twiddle/macrology.h"
  8 | #include <twiddle/bloomfilter/bloomfilter.h>
  9 | 
 10 | static struct option long_options[] = {
 11 |     {"probability", required_argument, 0, 'p'},
 12 |     {"count", required_argument, 0, 'n'},
 13 |     {"duration", required_argument, 0, 'd'},
 14 |     {0, 0, 0, 0}};
 15 | 
 16 | static int parse_probability(float *p)
 17 | {
 18 |   const float parsed_p = strtof(optarg, NULL);
 19 |   if (!(0 < parsed_p && parsed_p <= 1)) {
 20 |     return false;
 21 |   }
 22 | 
 23 |   *p = parsed_p;
 24 | 
 25 |   return true;
 26 | }
 27 | 
 28 | static bool parse_count(int64_t *n)
 29 | {
 30 |   const int64_t parsed_n = strtoll(optarg, NULL, 10);
 31 |   if (parsed_n < 0) {
 32 |     return false;
 33 |   }
 34 | 
 35 |   *n = parsed_n;
 36 | 
 37 |   return true;
 38 | }
 39 | 
 40 | static bool apply_time_suffix(float *x, char suffix_char)
 41 | {
 42 |   int multiplier;
 43 | 
 44 |   switch (suffix_char) {
 45 |   case 0:
 46 |   case 's':
 47 |     multiplier = 1;
 48 |     break;
 49 |   case 'm':
 50 |     multiplier = 60;
 51 |     break;
 52 |   case 'h':
 53 |     multiplier = 60 * 60;
 54 |     break;
 55 |   case 'd':
 56 |     multiplier = 60 * 60 * 24;
 57 |     break;
 58 |   default:
 59 |     return false;
 60 |   }
 61 | 
 62 |   *x *= multiplier;
 63 | 
 64 |   return true;
 65 | }
 66 | 
 67 | static bool parse_duration(const char *str, float *d)
 68 | {
 69 |   char *ep;
 70 |   float duration = strtof(str, &ep);
 71 | 
 72 |   if (!(0 <= duration)
 73 |       /* No extra chars after the number and an optional s,m,h,d char.  */
 74 |       || (*ep && *(ep + 1))
 75 |       /* Check any suffix char and update timeout based on the suffix.  */
 76 |       || !apply_time_suffix(&duration, *ep)) {
 77 |     return false;
 78 |   }
 79 | 
 80 |   *d = duration;
 81 | 
 82 |   return true;
 83 | }
 84 | 
 85 | static int parse_arguments(int argc, char **argv, int64_t *n, float *p,
 86 |                            float *d)
 87 | {
 88 |   int c = 0;
 89 | 
 90 |   while (1) {
 91 |     int option_index = 0;
 92 | 
 93 |     c = getopt_long(argc, argv, "n:p:d:", long_options, &option_index);
 94 |     if (c == -1)
 95 |       break;
 96 | 
 97 |     switch (c) {
 98 |     case 'n':
 99 |       if (!parse_count(n)) {
100 |         return -1;
101 |       }
102 |       break;
103 |     case 'p':
104 |       if (!parse_probability(p)) {
105 |         return -1;
106 |       }
107 |       break;
108 |     case 'd':
109 |       if (!parse_duration(optarg, d)) {
110 |         return -1;
111 |       }
112 |       break;
113 |     default:
114 |       printf("?? getopt returned character code 0%o ??\n", c);
115 |       return -1;
116 |     }
117 |   }
118 | 
119 |   return 0;
120 | }
121 | 
122 | #ifdef __APPLE__
123 | #include <sys/time.h>
124 | #define CLOCK_MONOTONIC 0
125 | int clock_gettime(int __attribute__((unused)) clk_id, struct timespec *t)
126 | {
127 |   struct timeval now;
128 |   int rv = gettimeofday(&now, NULL);
129 |   if (rv)
130 |     return rv;
131 |   t->tv_sec = now.tv_sec;
132 |   t->tv_nsec = now.tv_usec * 1000;
133 |   return 0;
134 | }
135 | #endif
136 | 
137 | int main(int argc, char *argv[])
138 | {
139 |   int64_t n = 1000000;
140 |   float p = 0.0001;
141 |   float c = -1.0;
142 | 
143 |   if (parse_arguments(argc, argv, &n, &p, &c) != 0) {
144 |     exit(-1);
145 |   }
146 | 
147 |   const uint64_t m = tw_bloomfilter_optimal_m(n, p);
148 |   const uint16_t k = tw_bloomfilter_optimal_k(n, m);
149 | 
150 |   /* parse options */
151 | 
152 |   struct tw_bloomfilter *bf = tw_bloomfilter_new(m, k);
153 | 
154 |   if (!bf) {
155 |     exit(1);
156 |   }
157 | 
158 |   char *line = NULL;
159 |   size_t buf_len = 0;
160 |   ssize_t line_len = 0;
161 | 
162 |   /* setup expire */
163 |   struct timespec next_expire = {0, 0};
164 |   clock_gettime(CLOCK_MONOTONIC, &next_expire);
165 |   next_expire.tv_sec += c;
166 | 
167 |   while ((line_len = getline(&line, &buf_len, stdin)) != -1) {
168 | 
169 |     if (tw_unlikely(c > 0)) {
170 |       struct timespec now = {0, 0};
171 |       clock_gettime(CLOCK_MONOTONIC, &now);
172 | 
173 |       if (tw_unlikely(now.tv_sec >= next_expire.tv_sec)) {
174 |         next_expire.tv_sec += c;
175 |         tw_bloomfilter_zero(bf);
176 |       }
177 |     }
178 | 
179 |     if (!tw_bloomfilter_test(bf, line, line_len)) {
180 |       fprintf(stdout, "%s", line);
181 |       tw_bloomfilter_set(bf, line, line_len);
182 |     }
183 |   }
184 | 
185 |   free(line);
186 | 
187 |   tw_bloomfilter_free(bf);
188 | 
189 |   return 0;
190 | }
191 | 


--------------------------------------------------------------------------------
/src/twiddle/hyperloglog/hyperloglog_simd.c:
--------------------------------------------------------------------------------
  1 | #include <math.h>
  2 | #include <x86intrin.h>
  3 | 
  4 | #include <twiddle/hyperloglog/hyperloglog.h>
  5 | 
  6 | #ifdef USE_AVX2
  7 | /* http://stackoverflow.com/questions/13219146/how-to-sum-m256-horizontally */
  8 | static inline float horizontal_sum_avx2(__m256 x)
  9 | {
 10 |   const __m128 hi_quad = _mm256_extractf128_ps(x, 1);
 11 |   const __m128 lo_quad = _mm256_castps256_ps128(x);
 12 |   const __m128 sum_quad = _mm_add_ps(lo_quad, hi_quad);
 13 |   const __m128 lo_dual = sum_quad;
 14 |   const __m128 hi_dual = _mm_movehl_ps(sum_quad, sum_quad);
 15 |   const __m128 sum_dual = _mm_add_ps(lo_dual, hi_dual);
 16 |   const __m128 lo = sum_dual;
 17 |   const __m128 hi = _mm_shuffle_ps(sum_dual, sum_dual, 0x1);
 18 |   const __m128 sum = _mm_add_ss(lo, hi);
 19 |   return _mm_cvtss_f32(sum);
 20 | }
 21 | 
 22 | #define _mm256_cntz_epi8(simd)                                                 \
 23 |   __builtin_popcount(                                                          \
 24 |       _mm256_movemask_epi8(_mm256_cmpeq_epi8(simd, _mm256_setzero_si256())))
 25 | 
 26 | #define inverse_power_avx2(simd)                                               \
 27 |   _mm256_sub_epi32(ones, _mm256_slli_epi32(_mm256_cvtepu8_epi32(simd), 23))
 28 | 
 29 | static inline void hyperloglog_count_avx2(const uint8_t *registers,
 30 |                                           uint32_t n_registers,
 31 |                                           float *inverse_sum, uint32_t *n_zeros)
 32 | {
 33 |   const __m256i ones = (__m256i)_mm256_set1_ps(1.0f);
 34 |   __m256 agg = _mm256_set1_ps(0.0f);
 35 | 
 36 |   for (size_t i = 0; i < n_registers / sizeof(__m256i); ++i) {
 37 |     const __m256i simd = _mm256_load_si256((__m256i *)registers + i);
 38 |     /* For some reason, VPSRLDQ works on lane of 128bits instead of 256. */
 39 |     const __m128i low = _mm256_extracti128_si256(simd, 0);
 40 |     const __m128i high = _mm256_extracti128_si256(simd, 1);
 41 | 
 42 |     __m256i sums = inverse_power_avx2(low);
 43 |     agg = _mm256_add_ps(agg, (__m256)sums);
 44 | 
 45 |     sums = inverse_power_avx2(_mm_srli_si128(low, 8));
 46 |     agg = _mm256_add_ps(agg, (__m256)sums);
 47 | 
 48 |     sums = inverse_power_avx2(high);
 49 |     agg = _mm256_add_ps(agg, (__m256)sums);
 50 | 
 51 |     sums = inverse_power_avx2(_mm_srli_si128(high, 8));
 52 |     agg = _mm256_add_ps(agg, (__m256)sums);
 53 | 
 54 |     *n_zeros += _mm256_cntz_epi8(simd);
 55 |   }
 56 | 
 57 |   *inverse_sum = horizontal_sum_avx2(agg);
 58 | }
 59 | 
 60 | #elif defined USE_AVX
 61 | 
 62 | static inline float horizontal_sum_avx(__m128 x)
 63 | {
 64 |   x = _mm_hadd_ps(x, x);
 65 |   x = _mm_hadd_ps(x, x);
 66 |   return _mm_cvtss_f32(x);
 67 | }
 68 | 
 69 | #define _mm_cntz_epi8(simd)                                                    \
 70 |   __builtin_popcount(                                                          \
 71 |       _mm_movemask_epi8(_mm_cmpeq_epi8(simd, _mm_setzero_si128())))
 72 | 
 73 | #define inverse_power_avx(simd)                                                \
 74 |   _mm_sub_epi32(ones, _mm_slli_epi32(_mm_cvtepu8_epi32(simd), 23))
 75 | 
 76 | static inline void hyperloglog_count_avx(const uint8_t *registers,
 77 |                                          uint32_t n_registers,
 78 |                                          float *inverse_sum, uint32_t *n_zeros)
 79 | {
 80 |   const __m128i ones = (__m128i)_mm_set1_ps(1.0f);
 81 |   __m128 agg = _mm_set1_ps(0.0f);
 82 | 
 83 |   for (size_t i = 0; i < n_registers / sizeof(__m128i); ++i) {
 84 |     const __m128i simd = _mm_load_si128((__m128i *)registers + i);
 85 | 
 86 |     __m128i powers = inverse_power_avx(simd);
 87 |     agg = _mm_add_ps(agg, (__m128)powers);
 88 | 
 89 |     powers = inverse_power_avx(_mm_srli_si128(simd, 4));
 90 |     agg = _mm_add_ps(agg, (__m128)powers);
 91 | 
 92 |     powers = inverse_power_avx(_mm_srli_si128(simd, 8));
 93 |     agg = _mm_add_ps(agg, (__m128)powers);
 94 | 
 95 |     powers = inverse_power_avx(_mm_srli_si128(simd, 12));
 96 |     agg = _mm_add_ps(agg, (__m128)powers);
 97 | 
 98 |     *n_zeros += _mm_cntz_epi8(simd);
 99 |   }
100 | 
101 |   *inverse_sum = horizontal_sum_avx(agg);
102 | }
103 | 
104 | #endif
105 | 
106 | static inline void hyperloglog_count_port(const uint8_t *registers,
107 |                                           uint32_t n_registers,
108 |                                           float *inverse_sum, uint32_t *n_zeros)
109 | 
110 | {
111 |   for (size_t i = 0; i < n_registers; ++i) {
112 |     const uint8_t val = registers[i];
113 |     *inverse_sum += powf(2, -1.0 * val);
114 |     if (val == 0) {
115 |       *n_zeros += 1;
116 |     }
117 |   }
118 | }
119 | 


--------------------------------------------------------------------------------
/src/twiddle/utils/metrohash.c:
--------------------------------------------------------------------------------
  1 | #include <twiddle/utils/hash.h>
  2 | 
  3 | #include "../macrology.h"
  4 | #include "internal.h"
  5 | 
  6 | static const uint64_t k0_64 = 0xD6D018F5;
  7 | static const uint64_t k1_64 = 0xA2AA033B;
  8 | static const uint64_t k2_64 = 0x62992FC1;
  9 | static const uint64_t k3_64 = 0x30BC5B29;
 10 | 
 11 | uint64_t tw_metrohash_64(const uint64_t seed, const void *key,
 12 |                          const size_t key_len)
 13 | {
 14 |   const uint8_t *ptr = (uint8_t *)key;
 15 |   const uint8_t *const end = ptr + key_len;
 16 | 
 17 |   uint64_t h = (seed + k2_64) * k0_64;
 18 | 
 19 |   // clang-format off
 20 |   if (key_len >= 32) {
 21 |     uint64_t v[4];
 22 |     v[0] = h;
 23 |     v[1] = h;
 24 |     v[2] = h;
 25 |     v[3] = h;
 26 | 
 27 |     do {
 28 |       v[0] += cread_u64(ptr) * k0_64; ptr += 8; v[0] = rotr64(v[0],29) + v[2];
 29 |       v[1] += cread_u64(ptr) * k1_64; ptr += 8; v[1] = rotr64(v[1],29) + v[3];
 30 |       v[2] += cread_u64(ptr) * k2_64; ptr += 8; v[2] = rotr64(v[2],29) + v[0];
 31 |       v[3] += cread_u64(ptr) * k3_64; ptr += 8; v[3] = rotr64(v[3],29) + v[1];
 32 |     } while (ptr <= (end - 32));
 33 | 
 34 |     v[2] ^= rotr64(((v[0] + v[3]) * k0_64) + v[1], 37) * k1_64;
 35 |     v[3] ^= rotr64(((v[1] + v[2]) * k1_64) + v[0], 37) * k0_64;
 36 |     v[0] ^= rotr64(((v[0] + v[2]) * k0_64) + v[3], 37) * k1_64;
 37 |     v[1] ^= rotr64(((v[1] + v[3]) * k1_64) + v[2], 37) * k0_64;
 38 |     h += v[0] ^ v[1];
 39 |   }
 40 | 
 41 |   if ((end - ptr) >= 16) {
 42 |     uint64_t v0 = h + (cread_u64(ptr) * k2_64); ptr += 8; v0 = rotr64(v0,29) * k3_64;
 43 |     uint64_t v1 = h + (cread_u64(ptr) * k2_64); ptr += 8; v1 = rotr64(v1,29) * k3_64;
 44 |     v0 ^= rotr64(v0 * k0_64, 21) + v1;
 45 |     v1 ^= rotr64(v1 * k3_64, 21) + v0;
 46 |     h += v1;
 47 |   }
 48 | 
 49 |   if ((end - ptr) >= 8) {
 50 |     h += cread_u64(ptr) * k3_64; ptr += 8;
 51 |     h ^= rotr64(h, 55) * k1_64;
 52 |   }
 53 | 
 54 |   if ((end - ptr) >= 4) {
 55 |     h += cread_u32(ptr) * k3_64; ptr += 4;
 56 |     h ^= rotr64(h, 26) * k1_64;
 57 |   }
 58 | 
 59 |   if ((end - ptr) >= 2) {
 60 |     h += cread_u16(ptr) * k3_64; ptr += 2;
 61 |     h ^= rotr64(h, 48) * k1_64;
 62 |   }
 63 | 
 64 |   if ((end - ptr) >= 1) {
 65 |     h += cread_u8(ptr) * k3_64;
 66 |     h ^= rotr64(h, 37) * k1_64;
 67 |   }
 68 | 
 69 |   h ^= rotr64(h, 28);
 70 |   h *= k0_64;
 71 |   h ^= rotr64(h, 29);
 72 |   // clang-format on
 73 | 
 74 |   return h;
 75 | }
 76 | 
 77 | static const uint64_t k0_128 = 0xC83A91E1;
 78 | static const uint64_t k1_128 = 0x8648DBDB;
 79 | static const uint64_t k2_128 = 0x7BDEC03B;
 80 | static const uint64_t k3_128 = 0x2F5870A5;
 81 | 
 82 | tw_uint128_t tw_metrohash_128(const uint64_t seed, const void *key,
 83 |                               size_t key_len)
 84 | {
 85 |   const uint8_t *ptr = (uint8_t *)key;
 86 |   const uint8_t *const end = ptr + key_len;
 87 | 
 88 |   uint64_t v[4];
 89 | 
 90 |   // clang-format off
 91 |   v[0] = (seed - k0_128) * k3_128;
 92 |   v[1] = (seed + k1_128) * k2_128;
 93 | 
 94 |   if (key_len >= 32) {
 95 |     v[2] = (seed + k0_128) * k2_128;
 96 |     v[3] = (seed - k1_128) * k3_128;
 97 | 
 98 |     do {
 99 |       v[0] += cread_u64(ptr) * k0_128; ptr += 8; v[0] = rotr64(v[0],29) + v[2];
100 |       v[1] += cread_u64(ptr) * k1_128; ptr += 8; v[1] = rotr64(v[1],29) + v[3];
101 |       v[2] += cread_u64(ptr) * k2_128; ptr += 8; v[2] = rotr64(v[2],29) + v[0];
102 |       v[3] += cread_u64(ptr) * k3_128; ptr += 8; v[3] = rotr64(v[3],29) + v[1];
103 |     } while (ptr <= (end - 32));
104 | 
105 |     v[2] ^= rotr64(((v[0] + v[3]) * k0_128) + v[1], 21) * k1_128;
106 |     v[3] ^= rotr64(((v[1] + v[2]) * k1_128) + v[0], 21) * k0_128;
107 |     v[0] ^= rotr64(((v[0] + v[2]) * k0_128) + v[3], 21) * k1_128;
108 |     v[1] ^= rotr64(((v[1] + v[3]) * k1_128) + v[2], 21) * k0_128;
109 |   }
110 | 
111 |   if ((end - ptr) >= 16) {
112 |     v[0] += cread_u64(ptr) * k2_128; ptr += 8; v[0] = rotr64(v[0],33) * k3_128;
113 |     v[1] += cread_u64(ptr) * k2_128; ptr += 8; v[1] = rotr64(v[1],33) * k3_128;
114 |     v[0] ^= rotr64((v[0] * k2_128) + v[1], 45) * k1_128;
115 |     v[1] ^= rotr64((v[1] * k3_128) + v[0], 45) * k0_128;
116 |   }
117 | 
118 |   if ((end - ptr) >= 8) {
119 |     v[0] += cread_u64(ptr) * k2_128; ptr += 8; v[0] = rotr64(v[0],33) * k3_128;
120 |     v[0] ^= rotr64((v[0] * k2_128) + v[1], 27) * k1_128;
121 |   }
122 | 
123 |   if ((end - ptr) >= 4) {
124 |     v[1] += cread_u32(ptr) * k2_128; ptr += 4; v[1] = rotr64(v[1],33) * k3_128;
125 |     v[1] ^= rotr64((v[1] * k3_128) + v[0], 46) * k0_128;
126 |   }
127 | 
128 |   if ((end - ptr) >= 2) {
129 |     v[0] += cread_u16(ptr) * k2_128; ptr += 2; v[0] = rotr64(v[0],33) * k3_128;
130 |     v[0] ^= rotr64((v[0] * k2_128) + v[1], 22) * k1_128;
131 |   }
132 | 
133 |   if ((end - ptr) >= 1) {
134 |     v[1] += cread_u8(ptr) * k2_128; v[1] = rotr64(v[1],33) * k3_128;
135 |     v[1] ^= rotr64((v[1] * k3_128) + v[0], 58) * k0_128;
136 |   }
137 | 
138 |   v[0] += rotr64((v[0] * k0_128) + v[1], 13);
139 |   v[1] += rotr64((v[1] * k1_128) + v[0], 37);
140 |   v[0] += rotr64((v[0] * k2_128) + v[1], 13);
141 |   v[1] += rotr64((v[1] * k3_128) + v[0], 37);
142 |   // clang-format on
143 | 
144 |   return (tw_uint128_t){.h = v[0], .l = v[1]};
145 | }
146 | 


--------------------------------------------------------------------------------
/tests/test.h:
--------------------------------------------------------------------------------
 1 | #include "check/check.h"
 2 | #include <inttypes.h>
 3 | #include <stdio.h>
 4 | 
 5 | #undef _ck_assert_ptr
 6 | #define _ck_assert_ptr(X, OP, Y)                                               \
 7 |   do {                                                                         \
 8 |     const void *_ck_x = (X);                                                   \
 9 |     const void *_ck_y = (Y);                                                   \
10 |     ck_assert_msg(_ck_x OP _ck_y,                                              \
11 |                   "Assertion '%s' failed: %s == %#x, %s == %#x",               \
12 |                   #X " " #OP " " #Y, #X, _ck_x, #Y, _ck_y);                    \
13 |   } while (0)
14 | 
15 | #define DESCRIBE_TEST fprintf(stderr, "--- %s\n", __func__);
16 | 
17 | #define _ck_assert_type(type, typefmt, X, OP, Y)                               \
18 |   do {                                                                         \
19 |     type _ck_x = (X);                                                          \
20 |     type _ck_y = (Y);                                                          \
21 |     ck_assert_msg(_ck_x OP _ck_y, "Assertion '%s' failed: %s == %" typefmt     \
22 |                                   " , %s == %" typefmt,                        \
23 |                   #X " " #OP " " #Y, #X, _ck_x, #Y, _ck_y);                    \
24 |   } while (0)
25 | 
26 | #define ck_assert_uint8_t_eq(X, Y) _ck_assert_type(uint8_t, PRIu8, X, ==, Y)
27 | #define ck_assert_uint8_t_ne(X, Y) _ck_assert_type(uint8_t, PRIu8, X, !=, Y)
28 | #define ck_assert_uint8_t_lt(X, Y) _ck_assert_type(uint8_t, PRIu8, X, <, Y)
29 | #define ck_assert_uint8_t_gt(X, Y) _ck_assert_type(uint8_t, PRIu8, X, >, Y)
30 | #define ck_assert_uint8_t_le(X, Y) _ck_assert_type(uint8_t, PRIu8, X, <=, Y)
31 | #define ck_assert_uint8_t_ge(X, Y) _ck_assert_type(uint8_t, PRIu8, X, >=, Y)
32 | 
33 | #define ck_assert_uint16_t_eq(X, Y) _ck_assert_type(uint16_t, PRIu16, X, ==, Y)
34 | #define ck_assert_uint16_t_ne(X, Y) _ck_assert_type(uint16_t, PRIu16, X, !=, Y)
35 | #define ck_assert_uint16_t_lt(X, Y) _ck_assert_type(uint16_t, PRIu16, X, <, Y)
36 | #define ck_assert_uint16_t_gt(X, Y) _ck_assert_type(uint16_t, PRIu16, X, >, Y)
37 | #define ck_assert_uint16_t_le(X, Y) _ck_assert_type(uint16_t, PRIu16, X, <=, Y)
38 | #define ck_assert_uint16_t_ge(X, Y) _ck_assert_type(uint16_t, PRIu16, X, >=, Y)
39 | 
40 | #define ck_assert_uint32_t_eq(X, Y) _ck_assert_type(uint32_t, PRIu32, X, ==, Y)
41 | #define ck_assert_uint32_t_ne(X, Y) _ck_assert_type(uint32_t, PRIu32, X, !=, Y)
42 | #define ck_assert_uint32_t_lt(X, Y) _ck_assert_type(uint32_t, PRIu32, X, <, Y)
43 | #define ck_assert_uint32_t_gt(X, Y) _ck_assert_type(uint32_t, PRIu32, X, >, Y)
44 | #define ck_assert_uint32_t_le(X, Y) _ck_assert_type(uint32_t, PRIu32, X, <=, Y)
45 | #define ck_assert_uint32_t_ge(X, Y) _ck_assert_type(uint32_t, PRIu32, X, >=, Y)
46 | 
47 | #define ck_assert_uint64_t_eq(X, Y) _ck_assert_type(uint64_t, PRIu64, X, ==, Y)
48 | #define ck_assert_uint64_t_ne(X, Y) _ck_assert_type(uint64_t, PRIu64, X, !=, Y)
49 | #define ck_assert_uint64_t_lt(X, Y) _ck_assert_type(uint64_t, PRIu64, X, <, Y)
50 | #define ck_assert_uint64_t_gt(X, Y) _ck_assert_type(uint64_t, PRIu64, X, >, Y)
51 | #define ck_assert_uint64_t_le(X, Y) _ck_assert_type(uint64_t, PRIu64, X, <=, Y)
52 | #define ck_assert_uint64_t_ge(X, Y) _ck_assert_type(uint64_t, PRIu64, X, >=, Y)
53 | 
54 | #define ck_assert_int8_t_eq(X, Y) _ck_assert_type(int8_t, PRId8, X, ==, Y)
55 | #define ck_assert_int8_t_ne(X, Y) _ck_assert_type(int8_t, PRId8, X, !=, Y)
56 | #define ck_assert_int8_t_lt(X, Y) _ck_assert_type(int8_t, PRId8, X, <, Y)
57 | #define ck_assert_int8_t_gt(X, Y) _ck_assert_type(int8_t, PRId8, X, >, Y)
58 | #define ck_assert_int8_t_le(X, Y) _ck_assert_type(int8_t, PRId8, X, <=, Y)
59 | #define ck_assert_int8_t_ge(X, Y) _ck_assert_type(int8_t, PRId8, X, >=, Y)
60 | 
61 | #define ck_assert_int16_t_eq(X, Y) _ck_assert_type(int16_t, PRId16, X, ==, Y)
62 | #define ck_assert_int16_t_ne(X, Y) _ck_assert_type(int16_t, PRId16, X, !=, Y)
63 | #define ck_assert_int16_t_lt(X, Y) _ck_assert_type(int16_t, PRId16, X, <, Y)
64 | #define ck_assert_int16_t_gt(X, Y) _ck_assert_type(int16_t, PRId16, X, >, Y)
65 | #define ck_assert_int16_t_le(X, Y) _ck_assert_type(int16_t, PRId16, X, <=, Y)
66 | #define ck_assert_int16_t_ge(X, Y) _ck_assert_type(int16_t, PRId16, X, >=, Y)
67 | 
68 | #define ck_assert_int32_t_eq(X, Y) _ck_assert_type(int32_t, PRId32, X, ==, Y)
69 | #define ck_assert_int32_t_ne(X, Y) _ck_assert_type(int32_t, PRId32, X, !=, Y)
70 | #define ck_assert_int32_t_lt(X, Y) _ck_assert_type(int32_t, PRId32, X, <, Y)
71 | #define ck_assert_int32_t_gt(X, Y) _ck_assert_type(int32_t, PRId32, X, >, Y)
72 | #define ck_assert_int32_t_le(X, Y) _ck_assert_type(int32_t, PRId32, X, <=, Y)
73 | #define ck_assert_int32_t_ge(X, Y) _ck_assert_type(int32_t, PRId32, X, >=, Y)
74 | 
75 | #define ck_assert_int64_t_eq(X, Y) _ck_assert_type(int64_t, PRId64, X, ==, Y)
76 | #define ck_assert_int64_t_ne(X, Y) _ck_assert_type(int64_t, PRId64, X, !=, Y)
77 | #define ck_assert_int64_t_lt(X, Y) _ck_assert_type(int64_t, PRId64, X, <, Y)
78 | #define ck_assert_int64_t_gt(X, Y) _ck_assert_type(int64_t, PRId64, X, >, Y)
79 | #define ck_assert_int64_t_le(X, Y) _ck_assert_type(int64_t, PRId64, X, <=, Y)
80 | #define ck_assert_int64_t_ge(X, Y) _ck_assert_type(int64_t, PRId64, X, >=, Y)
81 | 
82 | /*
83 | #undef _ck_assert_type_all
84 | #undef _ck_assert_type
85 | */
86 | 


--------------------------------------------------------------------------------
/include/twiddle/hash/minhash.h:
--------------------------------------------------------------------------------
  1 | #ifndef TWIDDLE_HASH_MINHASH_H
  2 | #define TWIDDLE_HASH_MINHASH_H
  3 | 
  4 | #include <stdbool.h>
  5 | #include <stdint.h>
  6 | 
  7 | /**
  8 |  * minhash data structure
  9 |  *
 10 |  * (source https://en.wikipedia.org/wiki/MinHash)
 11 |  *
 12 |  * MinHash (or the min-wise independent permutations locality sensitive hashing
 13 |  * scheme) is a technique for quickly estimating how similar two sets are. The
 14 |  * term MinHash was coined by Andrei Broder (1997), see [1]. This is also known
 15 |  * as the `k-mins` sketch.
 16 |  *
 17 |  * The simplest version of the MinHash scheme uses `k` different hash functions,
 18 |  * where `k` is a fixed integer parameter, and represents each set `S` by the
 19 |  * `k` values of `h_min(S)` for these `k` functions.
 20 |  *
 21 |  * To estimate `Jaccard(A, B)` using this version of the scheme, let `y` be the
 22 |  * number of hash functions for which `h_min(A) = h_min(B)`, and use `y/k` as
 23 |  * the estimate. This estimate is the average of `k` different 0-1 random
 24 |  * variables, each of which is one when `h_min(A) = h_min(B)` and zero
 25 |  * otherwise, and each of which is an unbiased estimator of `Jaccard(A, B)`.
 26 |  * Therefore, their average is also an unbiased estimator, and by standard
 27 |  * Chernoff bounds for sums of 0-1 random variables, its expected error is
 28 |  * `O(1/√k)`.
 29 |  *
 30 |  * Therefore, for any constant `ε > 0` there is a constant `k = O(1/ε^2)` such
 31 |  * that the expected error of the estimate is at most `ε`. For example, 400
 32 |  * hashes would be required to estimate `Jaccard(A, B)` with an expected error
 33 |  * less than or equal to .05.
 34 |  *
 35 |  * This implementation uses 32bits registers. It also uses max instead
 36 |  * of min, since it's homomorphic.
 37 |  *
 38 |  * [1] Broder, Andrei Z. "On the resemblance and containment of documents."
 39 |  * Compression and Complexity of Sequences 1997. Proceedings. IEEE, 1997.
 40 |  */
 41 | struct tw_minhash {
 42 |   /** number of registers, also the number of hash functions */
 43 |   uint32_t n_registers;
 44 |   /** registers holding computed values */
 45 |   uint32_t *registers;
 46 | };
 47 | 
 48 | /**
 49 |  * Allocate a `struct tw_minhash`.
 50 |  *
 51 |  * The allocation will be rounded up to the closest multiple of a
 52 |  * cacheline.
 53 |  *
 54 |  * @param n_registers stricly positive number of 32bit registers the structure
 55 |  *                    holds
 56 |  *
 57 |  * @return `NULL` if allocation failed, otherwise a pointer to the newly
 58 |  *         allocated `struct tw_minhash`.
 59 |  *
 60 |  * @note group:minhash
 61 |  */
 62 | struct tw_minhash *tw_minhash_new(uint32_t n_registers);
 63 | 
 64 | /**
 65 |  * Free a `struct tw_minhash`.
 66 |  *
 67 |  * @param hash to free
 68 |  *
 69 |  * @note group:minhash
 70 |  */
 71 | void tw_minhash_free(struct tw_minhash *hash);
 72 | 
 73 | /**
 74 |  * Copy a source `struct tw_minhash` into a specified destination.
 75 |  *
 76 |  * @param src non-null minhash to copy from
 77 |  * @param dst non-null minhash to copy to
 78 |  *
 79 |  * @return `NULL` if any hash is null or not of the same cardinality, otherwise
 80 |  *         a pointer to dst
 81 |  *
 82 |  * @note group:minhash
 83 |  */
 84 | struct tw_minhash *tw_minhash_copy(const struct tw_minhash *src,
 85 |                                    struct tw_minhash *dst);
 86 | 
 87 | /**
 88 |  * Clone a `struct tw_minhash` into a newly allocated one.
 89 |  *
 90 |  * @param hash non-null minhash to clone
 91 |  *
 92 |  * @return `NULL` if failed, otherwise a newly allocated minhash initialized
 93 |  *         from the requests minhash. The caller is responsible to deallocate
 94 |  *         the minhash with tw_minhash_free
 95 |  *
 96 |  * @note group:minhash
 97 |  */
 98 | struct tw_minhash *tw_minhash_clone(const struct tw_minhash *hash);
 99 | 
100 | /**
101 |  * Add an element into a `struct tw_minhash`.
102 |  *
103 |  * @param hash non-null minhash to add
104 |  * @param key non-null buffer of the key to add
105 |  * @param key_size stricly positive size of the buffer of the key to add
106 |  *
107 |  * @note group:minhash
108 |  */
109 | void tw_minhash_add(struct tw_minhash *hash, const void *key, size_t key_size);
110 | 
111 | /**
112 |  * Estimate the jaccard index between two `struct tw_minhash`s.
113 |  *
114 |  * @param fst non-null first minhash
115 |  * @param snd non-null second minhash
116 |  *
117 |  * @return `0.0` if any hash is null or hashes are not of the same cardinality,
118 |  *         otherwise the estimated jaccard index between `fst` and `snd`
119 |  *
120 |  * @note group:minhash
121 |  */
122 | float tw_minhash_estimate(const struct tw_minhash *fst,
123 |                           const struct tw_minhash *snd);
124 | 
125 | /**
126 |  * Verify if `struct tw_minhash`s are equal.
127 |  *
128 |  * @param fst non-null first minhash
129 |  * @param snd non-null second minhash
130 |  *
131 |  * @return `false` any hash is null or hashes are not of the same cardinality,
132 |  *         otherwise indicator if hashes are equal
133 |  *
134 |  * @note group:minhash
135 |  */
136 | bool tw_minhash_equal(const struct tw_minhash *fst,
137 |                       const struct tw_minhash *snd);
138 | 
139 | /**
140 |  * Merge a `struct tw_minhash` in a specified destination.
141 |  *
142 |  * @param src non-null minhash to merge from
143 |  * @param dst non-null minhash to merge to
144 |  *
145 |  * @return `NULL` if any hash is null or hashes are not of the same cardinality,
146 |  *         otherwise pointer to dst with merged registers
147 |  *
148 |  * @note group:minhash
149 |  */
150 | struct tw_minhash *tw_minhash_merge(const struct tw_minhash *src,
151 |                                     struct tw_minhash *dst);
152 | 
153 | #endif /* TWIDDLE_HASH_MINHASH_H */
154 | 


--------------------------------------------------------------------------------
/src/twiddle/bloomfilter/bloomfilter.c:
--------------------------------------------------------------------------------
  1 | #include <stdlib.h>
  2 | 
  3 | #include <twiddle/bitmap/bitmap.h>
  4 | #include <twiddle/bloomfilter/bloomfilter.h>
  5 | #include <twiddle/utils/hash.h>
  6 | #include <twiddle/utils/projection.h>
  7 | 
  8 | #define TW_BF_DEFAULT_SEED 3781869495ULL
  9 | 
 10 | struct tw_bloomfilter *tw_bloomfilter_new(uint64_t size, uint16_t k)
 11 | {
 12 |   if (!size || size > TW_BITMAP_MAX_BITS || !k) {
 13 |     return NULL;
 14 |   }
 15 | 
 16 |   struct tw_bloomfilter *bf = calloc(1, sizeof(struct tw_bloomfilter));
 17 |   if (!bf) {
 18 |     return NULL;
 19 |   }
 20 | 
 21 |   bf->bitmap = tw_bitmap_new(size);
 22 |   if (!(bf->bitmap)) {
 23 |     free(bf);
 24 |     return NULL;
 25 |   }
 26 | 
 27 |   bf->k = k;
 28 | 
 29 |   return bf;
 30 | }
 31 | 
 32 | void tw_bloomfilter_free(struct tw_bloomfilter *bf)
 33 | {
 34 |   if (!bf) {
 35 |     return;
 36 |   }
 37 | 
 38 |   tw_bitmap_free(bf->bitmap);
 39 |   free(bf);
 40 | }
 41 | 
 42 | struct tw_bloomfilter *tw_bloomfilter_copy(const struct tw_bloomfilter *src,
 43 |                                            struct tw_bloomfilter *dst)
 44 | {
 45 |   if (!src || !dst || dst->bitmap->size != src->bitmap->size) {
 46 |     return NULL;
 47 |   }
 48 | 
 49 |   dst->k = src->k;
 50 | 
 51 |   if (!tw_bitmap_copy(src->bitmap, dst->bitmap)) {
 52 |     return NULL;
 53 |   }
 54 | 
 55 |   return dst;
 56 | }
 57 | 
 58 | struct tw_bloomfilter *tw_bloomfilter_clone(const struct tw_bloomfilter *bf)
 59 | {
 60 |   if (!bf) {
 61 |     return NULL;
 62 |   }
 63 | 
 64 |   struct tw_bloomfilter *new = tw_bloomfilter_new(bf->bitmap->size, bf->k);
 65 |   if (!new) {
 66 |     return NULL;
 67 |   }
 68 | 
 69 |   return tw_bloomfilter_copy(bf, new);
 70 | }
 71 | 
 72 | void tw_bloomfilter_set(struct tw_bloomfilter *bf, const void *key,
 73 |                         size_t key_size)
 74 | {
 75 |   if (!bf || !key || !key_size) {
 76 |     return;
 77 |   }
 78 | 
 79 |   const tw_uint128_t hash = tw_metrohash_128(TW_BF_DEFAULT_SEED, key, key_size);
 80 |   const uint16_t k = bf->k;
 81 |   struct tw_bitmap *bitmap = bf->bitmap;
 82 |   const uint64_t b_size = bitmap->size;
 83 | 
 84 |   for (size_t i = 0; i < k; ++i) {
 85 |     const uint64_t hash_fn_i = hash.h + (i * hash.l);
 86 |     const uint64_t idx = tw_projection_mul_64(hash_fn_i, b_size);
 87 |     tw_bitmap_set(bitmap, idx);
 88 |   }
 89 | }
 90 | 
 91 | bool tw_bloomfilter_test(const struct tw_bloomfilter *bf, const void *key,
 92 |                          size_t key_size)
 93 | {
 94 |   if (!bf || !key || !key_size) {
 95 |     return false;
 96 |   }
 97 | 
 98 |   const tw_uint128_t hash = tw_metrohash_128(TW_BF_DEFAULT_SEED, key, key_size);
 99 | 
100 |   const uint16_t k = bf->k;
101 |   const struct tw_bitmap *bitmap = bf->bitmap;
102 |   const uint64_t b_size = bitmap->size;
103 | 
104 |   for (size_t i = 0; i < k; ++i) {
105 |     const uint64_t hash_fn_i = hash.h + (i * hash.l);
106 |     const uint64_t idx = tw_projection_mul_64(hash_fn_i, b_size);
107 |     if (!tw_bitmap_test(bitmap, idx)) {
108 |       return false;
109 |     }
110 |   }
111 | 
112 |   return true;
113 | }
114 | 
115 | bool tw_bloomfilter_empty(const struct tw_bloomfilter *bf)
116 | {
117 |   if (!bf) {
118 |     return false;
119 |   }
120 | 
121 |   return tw_bitmap_empty(bf->bitmap);
122 | }
123 | 
124 | bool tw_bloomfilter_full(const struct tw_bloomfilter *bf)
125 | {
126 |   if (!bf) {
127 |     return false;
128 |   }
129 | 
130 |   return tw_bitmap_full(bf->bitmap);
131 | }
132 | 
133 | uint64_t tw_bloomfilter_count(const struct tw_bloomfilter *bf)
134 | {
135 |   if (!bf) {
136 |     return 0;
137 |   }
138 | 
139 |   return tw_bitmap_count(bf->bitmap);
140 | }
141 | 
142 | float tw_bloomfilter_density(const struct tw_bloomfilter *bf)
143 | {
144 |   if (!bf) {
145 |     return 0.0f;
146 |   }
147 | 
148 |   return tw_bitmap_density(bf->bitmap);
149 | }
150 | 
151 | struct tw_bloomfilter *tw_bloomfilter_zero(struct tw_bloomfilter *bf)
152 | {
153 |   if (!bf) {
154 |     return NULL;
155 |   }
156 | 
157 |   return (tw_bitmap_zero(bf->bitmap)) ? bf : NULL;
158 | }
159 | 
160 | struct tw_bloomfilter *tw_bloomfilter_fill(struct tw_bloomfilter *bf)
161 | {
162 |   if (!bf) {
163 |     return NULL;
164 |   }
165 | 
166 |   return (tw_bitmap_fill(bf->bitmap)) ? bf : NULL;
167 | }
168 | 
169 | struct tw_bloomfilter *tw_bloomfilter_not(struct tw_bloomfilter *bf)
170 | {
171 |   if (!bf) {
172 |     return NULL;
173 |   }
174 | 
175 |   return (tw_bitmap_not(bf->bitmap)) ? bf : NULL;
176 | }
177 | 
178 | bool tw_bloomfilter_equal(const struct tw_bloomfilter *a,
179 |                           const struct tw_bloomfilter *b)
180 | {
181 |   if (!a || !b) {
182 |     return false;
183 |   }
184 | 
185 |   return (a->k == b->k) && tw_bitmap_equal(a->bitmap, b->bitmap);
186 | }
187 | 
188 | struct tw_bloomfilter *tw_bloomfilter_union(const struct tw_bloomfilter *src,
189 |                                             struct tw_bloomfilter *dst)
190 | {
191 |   if (!src || !dst || src->k != dst->k) {
192 |     return false;
193 |   }
194 | 
195 |   return (tw_bitmap_union(src->bitmap, dst->bitmap)) ? dst : NULL;
196 | }
197 | 
198 | struct tw_bloomfilter *
199 | tw_bloomfilter_intersection(const struct tw_bloomfilter *src,
200 |                             struct tw_bloomfilter *dst)
201 | {
202 |   if (!src || !dst || src->k != dst->k) {
203 |     return false;
204 |   }
205 | 
206 |   return (tw_bitmap_intersection(src->bitmap, dst->bitmap)) ? dst : NULL;
207 | }
208 | 
209 | struct tw_bloomfilter *tw_bloomfilter_xor(const struct tw_bloomfilter *src,
210 |                                           struct tw_bloomfilter *dst)
211 | {
212 |   if (!src || !dst || src->k != dst->k) {
213 |     return false;
214 |   }
215 | 
216 |   return (tw_bitmap_xor(src->bitmap, dst->bitmap)) ? dst : NULL;
217 | }
218 | 


--------------------------------------------------------------------------------
/tests/check/check_print.c:
--------------------------------------------------------------------------------
  1 | /*
  2 |  * Check: a unit test framework for C
  3 |  * Copyright (C) 2001, 2002 Arien Malec
  4 |  *
  5 |  * This library is free software; you can redistribute it and/or
  6 |  * modify it under the terms of the GNU Lesser General Public
  7 |  * License as published by the Free Software Foundation; either
  8 |  * version 2.1 of the License, or (at your option) any later version.
  9 |  *
 10 |  * This library is distributed in the hope that it will be useful,
 11 |  * but WITHOUT ANY WARRANTY; without even the implied warranty of
 12 |  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 13 |  * Lesser General Public License for more details.
 14 |  *
 15 |  * You should have received a copy of the GNU Lesser General Public
 16 |  * License along with this library; if not, write to the
 17 |  * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
 18 |  * Boston, MA 02111-1307, USA.
 19 |  */
 20 | 
 21 | #include "libcompat.h"
 22 | 
 23 | #include <stdio.h>
 24 | #include <stdlib.h>
 25 | #include <string.h>
 26 | 
 27 | #include "check.h"
 28 | #include "check_impl.h"
 29 | #include "check_list.h"
 30 | #include "check_print.h"
 31 | #include "check_str.h"
 32 | 
 33 | static void srunner_fprint_summary(FILE *file, SRunner *sr,
 34 |                                    enum print_output print_mode);
 35 | static void srunner_fprint_results(FILE *file, SRunner *sr,
 36 |                                    enum print_output print_mode);
 37 | 
 38 | void srunner_print(SRunner *sr, enum print_output print_mode)
 39 | {
 40 |   srunner_fprint(stdout, sr, print_mode);
 41 | }
 42 | 
 43 | void srunner_fprint(FILE *file, SRunner *sr, enum print_output print_mode)
 44 | {
 45 |   if (print_mode == CK_ENV) {
 46 |     print_mode = get_env_printmode();
 47 |   }
 48 | 
 49 |   srunner_fprint_summary(file, sr, print_mode);
 50 |   srunner_fprint_results(file, sr, print_mode);
 51 | }
 52 | 
 53 | static void srunner_fprint_summary(FILE *file, SRunner *sr,
 54 |                                    enum print_output print_mode)
 55 | {
 56 | #if 0
 57 |   if (print_mode == CK_SUBUNIT)
 58 |     return;
 59 | #endif
 60 | 
 61 |   if (print_mode >= CK_MINIMAL) {
 62 |     char *str;
 63 | 
 64 |     str = sr_stat_str(sr);
 65 |     fprintf(file, "%s\n", str);
 66 |     free(str);
 67 |   }
 68 |   return;
 69 | }
 70 | 
 71 | static void srunner_fprint_results(FILE *file, SRunner *sr,
 72 |                                    enum print_output print_mode)
 73 | {
 74 |   List *resultlst;
 75 | 
 76 | #if 0
 77 |   if (print_mode == CK_SUBUNIT)
 78 |     return;
 79 | #endif
 80 | 
 81 |   resultlst = sr->resultlst;
 82 | 
 83 |   for (check_list_front(resultlst); !check_list_at_end(resultlst);
 84 |        check_list_advance(resultlst)) {
 85 |     TestResult *tr = (TestResult *)check_list_val(resultlst);
 86 | 
 87 |     tr_fprint(file, tr, print_mode);
 88 |   }
 89 |   return;
 90 | }
 91 | 
 92 | void fprint_xml_esc(FILE *file, const char *str)
 93 | {
 94 |   for (; *str != '\0'; str++) {
 95 | 
 96 |     switch (*str) {
 97 | 
 98 |     /* handle special characters that must be escaped */
 99 |     case '"':
100 |       fputs("&quot;", file);
101 |       break;
102 |     case '\'':
103 |       fputs("&apos;", file);
104 |       break;
105 |     case '<':
106 |       fputs("&lt;", file);
107 |       break;
108 |     case '>':
109 |       fputs("&gt;", file);
110 |       break;
111 |     case '&':
112 |       fputs("&amp;", file);
113 |       break;
114 | 
115 |     /* regular characters, print as is */
116 |     default:
117 |       fputc(*str, file);
118 |       break;
119 |     }
120 |   }
121 | }
122 | 
123 | void tr_fprint(FILE *file, TestResult *tr, enum print_output print_mode)
124 | {
125 |   if (print_mode == CK_ENV) {
126 |     print_mode = get_env_printmode();
127 |   }
128 | 
129 |   if ((print_mode >= CK_VERBOSE && tr->rtype == CK_PASS) ||
130 |       (tr->rtype != CK_PASS && print_mode >= CK_NORMAL)) {
131 |     char *trstr = tr_str(tr);
132 | 
133 |     fprintf(file, "%s\n", trstr);
134 |     free(trstr);
135 |   }
136 | }
137 | 
138 | void tr_xmlprint(FILE *file, TestResult *tr,
139 |                  enum print_output print_mode CK_ATTRIBUTE_UNUSED)
140 | {
141 |   char result[10];
142 |   char *path_name = NULL;
143 |   char *file_name = NULL;
144 |   char *slash = NULL;
145 | 
146 |   switch (tr->rtype) {
147 |   case CK_PASS:
148 |     snprintf(result, sizeof(result), "%s", "success");
149 |     break;
150 |   case CK_FAILURE:
151 |     snprintf(result, sizeof(result), "%s", "failure");
152 |     break;
153 |   case CK_ERROR:
154 |     snprintf(result, sizeof(result), "%s", "error");
155 |     break;
156 |   case CK_TEST_RESULT_INVALID:
157 |   default:
158 |     abort();
159 |     break;
160 |   }
161 | 
162 |   if (tr->file) {
163 |     slash = strrchr(tr->file, '/');
164 |     if (slash == NULL) {
165 |       slash = strrchr(tr->file, '\\');
166 |     }
167 | 
168 |     if (slash == NULL) {
169 |       path_name = strdup(".");
170 |       file_name = tr->file;
171 |     } else {
172 |       path_name = strdup(tr->file);
173 |       path_name[slash - tr->file] = 0; /* Terminate the temporary string. */
174 |       file_name = slash + 1;
175 |     }
176 |   }
177 | 
178 |   fprintf(file, "    <test result=\"%s\">\n", result);
179 |   fprintf(file, "      <path>%s</path>\n",
180 |           (path_name == NULL ? "" : path_name));
181 |   fprintf(file, "      <fn>%s:%d</fn>\n", (file_name == NULL ? "" : file_name),
182 |           tr->line);
183 |   fprintf(file, "      <id>%s</id>\n", tr->tname);
184 |   fprintf(file, "      <iteration>%d</iteration>\n", tr->iter);
185 |   fprintf(file, "      <duration>%d.%06d</duration>\n",
186 |           tr->duration < 0 ? -1 : tr->duration / US_PER_SEC,
187 |           tr->duration < 0 ? 0 : tr->duration % US_PER_SEC);
188 |   fprintf(file, "      <description>");
189 |   fprint_xml_esc(file, tr->tcname);
190 |   fprintf(file, "</description>\n");
191 |   fprintf(file, "      <message>");
192 |   fprint_xml_esc(file, tr->msg);
193 |   fprintf(file, "</message>\n");
194 |   fprintf(file, "    </test>\n");
195 | 
196 |   free(path_name);
197 | }
198 | 
199 | enum print_output get_env_printmode(void)
200 | {
201 |   char *env = getenv("CK_VERBOSITY");
202 | 
203 |   if (env == NULL)
204 |     return CK_NORMAL;
205 |   if (strcmp(env, "silent") == 0)
206 |     return CK_SILENT;
207 |   if (strcmp(env, "minimal") == 0)
208 |     return CK_MINIMAL;
209 |   if (strcmp(env, "verbose") == 0)
210 |     return CK_VERBOSE;
211 |   return CK_NORMAL;
212 | }
213 | 


--------------------------------------------------------------------------------
/include/twiddle/hyperloglog/hyperloglog.h:
--------------------------------------------------------------------------------
  1 | #ifndef TWIDDLE_HYPERLOGLOG_H
  2 | #define TWIDDLE_HYPERLOGLOG_H
  3 | 
  4 | #include <math.h>
  5 | #include <stdbool.h>
  6 | #include <stdint.h>
  7 | 
  8 | #define TW_HLL_ERROR_FOR_REG(reg) (1.04 / sqrt((double)(reg)))
  9 | #define TW_HLL_REG_FOR_ERROR(err) (1.0816 / ((err) * (err)))
 10 | 
 11 | #define TW_HLL_MIN_PRECISION 6
 12 | #define TW_HLL_MAX_PRECISION 18
 13 | 
 14 | /**
 15 |  * hyperloglog data structure
 16 |  *
 17 |  * (source https://en.wikipedia.org/wiki/HyperLogLog)
 18 |  *
 19 |  * The basis of the HyperLogLog algorithm is the observation that the
 20 |  * cardinality of a multiset of uniformly distributed random numbers can be
 21 |  * estimated by calculating the maximum number of leading zeros in the binary
 22 |  * representation of each number in the set. If the maximum number of leading
 23 |  * zeros observed is `n`, an estimate for the number of
 24 |  * distinct elements in the set is `2^{n}`.
 25 |  *
 26 |  * In the HyperLogLog algorithm, a hash function is applied to each element in
 27 |  * the original multiset, to obtain a multiset of uniformly distributed random
 28 |  * numbers with the same cardinality as the original multiset. The cardinality
 29 |  * of this randomly distributed set can then be estimated using the algorithm
 30 |  * above.
 31 |  *
 32 |  * The simple estimate of cardinality obtained using the algorithm above has
 33 |  * the disadvantage of a large variance. In the HyperLogLog algorithm, the
 34 |  * variance is minimised by splitting the multiset into numerous subsets,
 35 |  * calculating the maximum number of leading zeros in the numbers in each of
 36 |  * these subsets, and using a harmonic mean to combine these estimates for each
 37 |  * subset into an estimate of the cardinality of the whole set.
 38 |  *
 39 |  * For small cardinality, the algorithm fallback to the linear counting
 40 |  * algorithm, see [1] for a detailed analysis. This implementation's estimator
 41 |  * use the bias correction proposed in [2]. Refering to the original paper,
 42 |  * we're using `m` registers of fixed size 64 bits.
 43 |  *
 44 |  * [1] Flajolet, Philippe, et al. "Hyperloglog: the analysis of a near-optimal
 45 |  * cardinality estimation algorithm." DMTCS Proceedings 1 (2008).
 46 |  *
 47 |  * [2] Heule, Stefan, Marc Nunkesser, and Alexander Hall. "HyperLogLog in
 48 |  * practice: Algorithmic engineering of a state of the art cardinality
 49 |  * estimation algorithm." Proceedings of the 16th International Conference on
 50 |  * Extending Database Technology. ACM, 2013.
 51 |  */
 52 | struct tw_hyperloglog {
 53 |   /** the number of registers will be defined as 2^precision */
 54 |   uint8_t precision;
 55 |   /** allocated array containing the 8bit registers */
 56 |   uint8_t *registers;
 57 | };
 58 | 
 59 | /**
 60 |  * Allocate a `struct tw_hyperloglog`.
 61 |  *
 62 |  * @param precision power-of-2 exponent number of bucket hyperloglog should use,
 63 |  *                  must be greater or equal than `TW_HLL_MIN_PRECISION and
 64 |  *                  smaller or equal than `TW_HLL_MAX_PRECISION`
 65 |  *
 66 |  * @return `NULL` if allocation failed, otherwise a pointer to the newly
 67 |  *         allocated `struct tw_hyperloglog`.
 68 |  *
 69 |  * @note group:hyperloglog
 70 |  */
 71 | struct tw_hyperloglog *tw_hyperloglog_new(uint8_t precision);
 72 | 
 73 | /**
 74 |  * Free a `struct tw_hyperloglog`.
 75 |  *
 76 |  * @param hll to free
 77 |  *
 78 |  * @note group:hyperloglog
 79 |  */
 80 | void tw_hyperloglog_free(struct tw_hyperloglog *hll);
 81 | 
 82 | /**
 83 |  * Copy a source `struct tw_hyperloglog` into a specified destination.
 84 |  *
 85 |  * @param src non-null hyperloglog to copy from
 86 |  * @param dst non-null hyperloglog of the same precision has src to copy to
 87 |  *
 88 |  * @return `NULL` if copy failed, otherwise a pointer to dst
 89 |  *
 90 |  * @note group:hyperloglog
 91 |  */
 92 | struct tw_hyperloglog *tw_hyperloglog_copy(const struct tw_hyperloglog *src,
 93 |                                            struct tw_hyperloglog *dst);
 94 | 
 95 | /**
 96 |  * Clone a `struct tw_hyperloglog` into a newly allocated one.
 97 |  *
 98 |  * @param hll non-null hyperloglog to clone
 99 |  *
100 |  * @return `NULL` if failed, otherwise a newly allocated hyperloglog initialized
101 |  *         from the requested hyperloglog. The caller is responsible to
102 |  *         deallocate with tw_hyperloglog_free
103 |  *
104 |  * @note group:hyperloglog
105 |  */
106 | struct tw_hyperloglog *tw_hyperloglog_clone(const struct tw_hyperloglog *hll);
107 | 
108 | /**
109 |  * Add an element in a `struct tw_hyperloglog`.
110 |  *
111 |  * @param hll non-null hyperloglog to add the element to
112 |  * @param key non-null buffer of the key to add
113 |  * @param key_size positive integer size of the key to add
114 |  *
115 |  * @note group:hyperloglog
116 |  */
117 | void tw_hyperloglog_add(struct tw_hyperloglog *hll, const void *key,
118 |                         size_t key_size);
119 | 
120 | /**
121 |  * Estimate the number of elements in a `struct tw_hyperloglog`.
122 |  *
123 |  * @param hll non-null hyperloglog to estimate
124 |  *
125 |  * @return `0.0` if hll is NULL, otherwise the estimated number of elements
126 |  *         in hll.
127 |  *
128 |  * @note group:hyperloglog
129 |  */
130 | double tw_hyperloglog_count(const struct tw_hyperloglog *hll);
131 | 
132 | /**
133 |  * Verify if `struct tw_hyperloglog`s are equal.
134 |  *
135 |  * @param fst non-null first hyperloglog to check
136 |  * @param snd non-null second hyperloglog to check
137 |  *
138 |  * @return `false` if any is null or not of the same precision, otherwise an
139 |  *         indicator if `src' and `dst' are equal
140 |  *
141 |  * @note group:hyperloglog
142 |  */
143 | bool tw_hyperloglog_equal(const struct tw_hyperloglog *fst,
144 |                           const struct tw_hyperloglog *snd);
145 | 
146 | /**
147 |  * Merge a `struct tw_hyperloglog` in a specified destination.
148 |  *
149 |  * The merge operation is an elemwise max applied to the buckets.
150 |  *
151 |  * @param src non-null hyperloglog to merge from
152 |  * @param dst non-null hyperloglog to merge to
153 |  *
154 |  * @return `NULL` if any is null or not of the same precision, otherwise a
155 |  *         pointer to merged `dst'
156 |  *
157 |  * @note group:hyperloglog
158 |  */
159 | struct tw_hyperloglog *tw_hyperloglog_merge(const struct tw_hyperloglog *src,
160 |                                             struct tw_hyperloglog *dst);
161 | #endif /* TWIDDLE_HYPERLOGLOG_H */
162 | 


--------------------------------------------------------------------------------
/tests/test-minhash.c:
--------------------------------------------------------------------------------
  1 | #include <math.h>
  2 | #include <stdbool.h>
  3 | #include <stdlib.h>
  4 | 
  5 | #include <twiddle/hash/minhash.h>
  6 | 
  7 | #include "../src/twiddle/macrology.h"
  8 | #include "test.h"
  9 | 
 10 | static bool estimate_in_bounds(uint32_t n, float jaccard,
 11 |                                float jaccard_estimate)
 12 | {
 13 |   /* This is solely used to make test pass with reasonnable bounds */
 14 |   const float epsilon = 0.25;
 15 |   return fabs(jaccard - jaccard_estimate) < (1 + epsilon) * (1 / sqrt(n));
 16 | }
 17 | 
 18 | START_TEST(test_minhash_basic)
 19 | {
 20 |   DESCRIBE_TEST;
 21 | 
 22 |   const uint32_t sizes[] = {256, 512, 1024, 2048, 4096, 1 << 13};
 23 | 
 24 |   const uint8_t sample = 4;
 25 | 
 26 |   for (size_t i = 0; i < TW_ARRAY_SIZE(sizes); ++i) {
 27 |     const uint32_t n_registers = sizes[i];
 28 |     struct tw_minhash *a = tw_minhash_new(n_registers);
 29 |     struct tw_minhash *b = tw_minhash_new(n_registers);
 30 | 
 31 |     const uint32_t n_items = n_registers * 4;
 32 |     uint32_t intersection = 0;
 33 |     for (size_t j = 0; j < n_items; ++j) {
 34 |       const size_t key_size = sizeof(j);
 35 |       const void *key = (void *)&j;
 36 | 
 37 |       tw_minhash_add(a, key, key_size);
 38 |       if (j % sample == 0) {
 39 |         tw_minhash_add(b, key, key_size);
 40 |         intersection++;
 41 |       }
 42 |     }
 43 | 
 44 |     const float jaccard = (float)intersection / (float)n_items;
 45 |     const float jaccard_estimate = tw_minhash_estimate(a, b);
 46 |     ck_assert(estimate_in_bounds(n_registers, jaccard, jaccard_estimate));
 47 | 
 48 |     tw_minhash_free(b);
 49 |     tw_minhash_free(a);
 50 |   }
 51 | }
 52 | END_TEST
 53 | 
 54 | START_TEST(test_minhash_copy_and_clone)
 55 | {
 56 |   DESCRIBE_TEST;
 57 | 
 58 |   const uint32_t sizes[] = {256, 512, 1024, 2048, 4096, 1 << 13};
 59 | 
 60 |   for (size_t i = 0; i < TW_ARRAY_SIZE(sizes); ++i) {
 61 |     const uint32_t n_registers = sizes[i];
 62 |     struct tw_minhash *a = tw_minhash_new(n_registers);
 63 |     struct tw_minhash *b = tw_minhash_clone(a);
 64 | 
 65 |     ck_assert(tw_minhash_equal(a, b));
 66 | 
 67 |     const uint32_t n_items = n_registers / 2;
 68 |     for (size_t j = 0; j < n_items; ++j) {
 69 |       const size_t key_size = sizeof(j);
 70 |       const void *key = (void *)&j;
 71 |       tw_minhash_add(a, key, key_size);
 72 |     }
 73 | 
 74 |     ck_assert(!tw_minhash_equal(a, b));
 75 | 
 76 |     for (size_t j = 0; j < n_items; ++j) {
 77 |       const size_t key_size = sizeof(j);
 78 |       const void *key = (void *)&j;
 79 |       tw_minhash_add(b, key, key_size);
 80 |     }
 81 | 
 82 |     ck_assert(tw_minhash_equal(a, b));
 83 | 
 84 |     struct tw_minhash *c = tw_minhash_new(n_registers);
 85 | 
 86 |     ck_assert(!tw_minhash_equal(a, c));
 87 |     ck_assert_ptr_ne(tw_minhash_copy(b, c), NULL);
 88 |     ck_assert(tw_minhash_equal(a, c));
 89 | 
 90 |     tw_minhash_free(c);
 91 |     tw_minhash_free(b);
 92 |     tw_minhash_free(a);
 93 |   }
 94 | }
 95 | END_TEST
 96 | 
 97 | START_TEST(test_minhash_merge)
 98 | {
 99 |   DESCRIBE_TEST;
100 | 
101 |   const uint32_t sizes[] = {16,  32,   64,   128,  256,
102 |                             512, 1024, 2048, 4096, 1 << 13};
103 | 
104 |   for (size_t i = 0; i < TW_ARRAY_SIZE(sizes); ++i) {
105 |     const uint32_t n_registers = sizes[i];
106 |     struct tw_minhash *a = tw_minhash_new(n_registers);
107 |     struct tw_minhash *b = tw_minhash_new(n_registers);
108 |     struct tw_minhash *f = tw_minhash_new(n_registers);
109 | 
110 |     const uint32_t n_items = n_registers * 4;
111 |     for (size_t j = 0; j < n_items; ++j) {
112 |       const size_t key_size = sizeof(j);
113 |       const void *key = (void *)&j;
114 |       tw_minhash_add(((j % 2) ? a : b), key, key_size);
115 |       tw_minhash_add(f, key, key_size);
116 |     }
117 | 
118 |     ck_assert(estimate_in_bounds(n_registers, 0, tw_minhash_estimate(a, b)));
119 |     ck_assert(estimate_in_bounds(n_registers, 0.5, tw_minhash_estimate(a, f)));
120 |     ck_assert(estimate_in_bounds(n_registers, 0.5, tw_minhash_estimate(b, f)));
121 | 
122 |     struct tw_minhash *u = tw_minhash_clone(a);
123 |     ck_assert_ptr_ne(tw_minhash_merge(b, u), NULL);
124 | 
125 |     for (size_t j = 0; j < n_registers; j++) {
126 |       bool same_value =
127 |           tw_max(a->registers[j], b->registers[j]) == u->registers[j];
128 |       ck_assert(same_value);
129 |     }
130 | 
131 |     ck_assert(estimate_in_bounds(n_registers, 0.5, tw_minhash_estimate(a, u)));
132 |     ck_assert(estimate_in_bounds(n_registers, 0.5, tw_minhash_estimate(b, u)));
133 |     ck_assert(estimate_in_bounds(n_registers, 1.0, tw_minhash_estimate(f, u)));
134 | 
135 |     tw_minhash_free(u);
136 |     tw_minhash_free(f);
137 |     tw_minhash_free(b);
138 |     tw_minhash_free(a);
139 |   }
140 | }
141 | END_TEST
142 | 
143 | START_TEST(test_minhash_errors)
144 | {
145 |   DESCRIBE_TEST;
146 | 
147 |   const uint32_t a_size = 1 << 16, b_size = (1 << 16) + 1;
148 | 
149 |   struct tw_minhash *a = tw_minhash_new(a_size);
150 |   struct tw_minhash *b = tw_minhash_new(b_size);
151 | 
152 |   ck_assert_ptr_eq(tw_minhash_new(0), NULL);
153 | 
154 |   ck_assert_ptr_eq(tw_minhash_copy(a, b), NULL);
155 |   ck_assert_ptr_eq(tw_minhash_copy(a, NULL), NULL);
156 |   ck_assert_ptr_eq(tw_minhash_copy(NULL, a), NULL);
157 |   ck_assert_ptr_eq(tw_minhash_clone(NULL), NULL);
158 | 
159 |   tw_minhash_add(NULL, NULL, 0);
160 |   tw_minhash_add(a, NULL, 1);
161 |   tw_minhash_add(a, &a_size, 0);
162 |   tw_minhash_add(a, &a_size, 1);
163 | 
164 |   tw_minhash_estimate(a, b);
165 |   tw_minhash_estimate(a, NULL);
166 |   tw_minhash_estimate(NULL, NULL);
167 | 
168 |   ck_assert(!tw_minhash_equal(a, b));
169 |   ck_assert(!tw_minhash_equal(NULL, b));
170 |   ck_assert(!tw_minhash_equal(a, NULL));
171 | 
172 |   ck_assert_ptr_eq(tw_minhash_merge(a, b), NULL);
173 |   ck_assert_ptr_eq(tw_minhash_merge(a, NULL), NULL);
174 |   ck_assert_ptr_eq(tw_minhash_merge(NULL, b), NULL);
175 | 
176 |   tw_minhash_free(NULL);
177 |   tw_minhash_free(b);
178 |   tw_minhash_free(a);
179 | }
180 | END_TEST
181 | 
182 | int run_tests()
183 | {
184 |   int number_failed;
185 | 
186 |   Suite *s = suite_create("minhash");
187 |   SRunner *runner = srunner_create(s);
188 |   TCase *tc = tcase_create("basic");
189 |   tcase_add_test(tc, test_minhash_basic);
190 |   tcase_add_test(tc, test_minhash_copy_and_clone);
191 |   tcase_add_test(tc, test_minhash_merge);
192 |   tcase_add_test(tc, test_minhash_errors);
193 |   /* added for travis slowness of clang */
194 |   tcase_set_timeout(tc, 15);
195 |   suite_add_tcase(s, tc);
196 |   srunner_run_all(runner, CK_NORMAL);
197 |   number_failed = srunner_ntests_failed(runner);
198 |   srunner_free(runner);
199 | 
200 |   return number_failed;
201 | }
202 | 
203 | int main() { return (run_tests() == 0) ? EXIT_SUCCESS : EXIT_FAILURE; }
204 | 


--------------------------------------------------------------------------------
/src/twiddle/bloomfilter/bloomfilter_a2.c:
--------------------------------------------------------------------------------
  1 | #include <twiddle/bitmap/bitmap.h>
  2 | #include <twiddle/bloomfilter/bloomfilter.h>
  3 | #include <twiddle/bloomfilter/bloomfilter_a2.h>
  4 | 
  5 | #include "../macrology.h"
  6 | 
  7 | struct tw_bloomfilter_a2 *tw_bloomfilter_a2_new(uint64_t size, uint16_t k,
  8 |                                                 float density)
  9 | {
 10 |   if ((!size || size > TW_BITMAP_MAX_BITS) || !k ||
 11 |       (density <= 0.0 || density > 1.0)) {
 12 |     return NULL;
 13 |   }
 14 | 
 15 |   struct tw_bloomfilter_a2 *bf = calloc(1, sizeof(struct tw_bloomfilter_a2));
 16 | 
 17 |   struct tw_bloomfilter *active = tw_bloomfilter_new(size, k);
 18 |   if (!active) {
 19 |     free(bf);
 20 |     return NULL;
 21 |   }
 22 | 
 23 |   struct tw_bloomfilter *passive = tw_bloomfilter_new(size, k);
 24 |   if (!passive) {
 25 |     free(bf);
 26 |     free(active);
 27 |     return NULL;
 28 |   }
 29 | 
 30 |   bf->density = density;
 31 |   bf->active = active;
 32 |   bf->passive = passive;
 33 | 
 34 |   return bf;
 35 | }
 36 | 
 37 | struct tw_bloomfilter_a2 *
 38 | tw_bloomfilter_a2_copy(const struct tw_bloomfilter_a2 *src,
 39 |                        struct tw_bloomfilter_a2 *dst)
 40 | {
 41 |   if (!src || !dst) {
 42 |     return NULL;
 43 |   }
 44 | 
 45 |   if (!tw_bloomfilter_copy(src->active, dst->active) ||
 46 |       !tw_bloomfilter_copy(src->passive, dst->passive)) {
 47 |     return NULL;
 48 |   }
 49 | 
 50 |   return dst;
 51 | }
 52 | 
 53 | struct tw_bloomfilter_a2 *
 54 | tw_bloomfilter_a2_clone(const struct tw_bloomfilter_a2 *bf)
 55 | {
 56 |   if (!bf) {
 57 |     return NULL;
 58 |   }
 59 | 
 60 |   struct tw_bloomfilter_a2 *new = tw_bloomfilter_a2_new(
 61 |       bf->active->bitmap->size, bf->active->k, bf->density);
 62 |   if (!new) {
 63 |     return NULL;
 64 |   }
 65 | 
 66 |   return tw_bloomfilter_a2_copy(bf, new);
 67 | }
 68 | 
 69 | void tw_bloomfilter_a2_free(struct tw_bloomfilter_a2 *bf)
 70 | {
 71 |   if (!bf) {
 72 |     return;
 73 |   }
 74 | 
 75 |   tw_bloomfilter_free(bf->active);
 76 |   tw_bloomfilter_free(bf->passive);
 77 |   free(bf);
 78 | }
 79 | 
 80 | static inline bool tw_bloomfilter_a2_rotate_(struct tw_bloomfilter_a2 *bf)
 81 | {
 82 |   if (tw_unlikely(tw_bloomfilter_density(bf->active) >= bf->density)) {
 83 |     struct tw_bloomfilter *tmp = bf->passive;
 84 |     bf->passive = bf->active;
 85 |     bf->active = tmp;
 86 |     tw_bloomfilter_zero(tmp);
 87 |     return true;
 88 |   }
 89 | 
 90 |   return false;
 91 | }
 92 | 
 93 | void tw_bloomfilter_a2_set(struct tw_bloomfilter_a2 *bf, const void *key,
 94 |                            size_t key_size)
 95 | {
 96 |   if (!bf || !key || !key_size) {
 97 |     return;
 98 |   }
 99 | 
100 |   tw_bloomfilter_a2_rotate_(bf);
101 | 
102 |   tw_bloomfilter_set(bf->active, key, key_size);
103 | }
104 | 
105 | bool tw_bloomfilter_a2_test(const struct tw_bloomfilter_a2 *bf, const void *key,
106 |                             size_t key_size)
107 | {
108 |   if (!bf || !key || !key_size) {
109 |     return false;
110 |   }
111 | 
112 |   return tw_bloomfilter_test(bf->active, key, key_size) ||
113 |          tw_bloomfilter_test(bf->passive, key, key_size);
114 | }
115 | 
116 | bool tw_bloomfilter_a2_empty(const struct tw_bloomfilter_a2 *bf)
117 | {
118 |   if (!bf) {
119 |     return false;
120 |   }
121 | 
122 |   return tw_bloomfilter_empty(bf->active) && tw_bloomfilter_empty(bf->passive);
123 | }
124 | 
125 | bool tw_bloomfilter_a2_full(const struct tw_bloomfilter_a2 *bf)
126 | {
127 |   if (!bf) {
128 |     return false;
129 |   }
130 | 
131 |   return tw_bloomfilter_full(bf->active) && tw_bloomfilter_full(bf->passive);
132 | }
133 | 
134 | uint64_t tw_bloomfilter_a2_count(const struct tw_bloomfilter_a2 *bf)
135 | {
136 |   if (!bf) {
137 |     return 0;
138 |   }
139 | 
140 |   return tw_bloomfilter_count(bf->active) + tw_bloomfilter_count(bf->passive);
141 | }
142 | 
143 | float tw_bloomfilter_a2_density(const struct tw_bloomfilter_a2 *bf)
144 | {
145 |   if (!bf) {
146 |     return 0.0;
147 |   }
148 | 
149 |   return (tw_bloomfilter_density(bf->active) +
150 |           tw_bloomfilter_density(bf->passive)) /
151 |          2.0;
152 | }
153 | 
154 | struct tw_bloomfilter_a2 *tw_bloomfilter_a2_zero(struct tw_bloomfilter_a2 *bf)
155 | {
156 |   if (!bf) {
157 |     return NULL;
158 |   }
159 | 
160 |   return (tw_bloomfilter_zero(bf->active) && tw_bloomfilter_zero(bf->passive))
161 |              ? bf
162 |              : NULL;
163 | }
164 | 
165 | struct tw_bloomfilter_a2 *tw_bloomfilter_a2_fill(struct tw_bloomfilter_a2 *bf)
166 | {
167 |   if (!bf) {
168 |     return NULL;
169 |   }
170 | 
171 |   return (tw_bloomfilter_fill(bf->active) && tw_bloomfilter_fill(bf->passive))
172 |              ? bf
173 |              : NULL;
174 | }
175 | 
176 | struct tw_bloomfilter_a2 *tw_bloomfilter_a2_not(struct tw_bloomfilter_a2 *bf)
177 | {
178 |   if (!bf) {
179 |     return NULL;
180 |   }
181 | 
182 |   return (tw_bloomfilter_not(bf->active) && tw_bloomfilter_not(bf->passive))
183 |              ? bf
184 |              : NULL;
185 | }
186 | 
187 | bool tw_bloomfilter_a2_equal(const struct tw_bloomfilter_a2 *fst,
188 |                              const struct tw_bloomfilter_a2 *snd)
189 | {
190 |   if (!fst || !snd) {
191 |     return false;
192 |   }
193 | 
194 |   return (tw_almost_equal(fst->density, snd->density) &&
195 |           tw_bloomfilter_equal(fst->active, snd->active) &&
196 |           tw_bloomfilter_equal(fst->passive, snd->passive));
197 | }
198 | 
199 | struct tw_bloomfilter_a2 *
200 | tw_bloomfilter_a2_union(const struct tw_bloomfilter_a2 *src,
201 |                         struct tw_bloomfilter_a2 *dst)
202 | {
203 |   if (!src || !dst) {
204 |     return NULL;
205 |   }
206 | 
207 |   if (!tw_almost_equal(src->density, dst->density)) {
208 |     return NULL;
209 |   }
210 | 
211 |   return (tw_bloomfilter_union(src->active, dst->active) &&
212 |           tw_bloomfilter_union(src->passive, dst->passive))
213 |              ? dst
214 |              : NULL;
215 | }
216 | 
217 | struct tw_bloomfilter_a2 *
218 | tw_bloomfilter_a2_intersection(const struct tw_bloomfilter_a2 *src,
219 |                                struct tw_bloomfilter_a2 *dst)
220 | {
221 |   if (!src || !dst) {
222 |     return NULL;
223 |   }
224 | 
225 |   if (!tw_almost_equal(src->density, dst->density)) {
226 |     return NULL;
227 |   }
228 | 
229 |   return (tw_bloomfilter_intersection(src->active, dst->active) &&
230 |           tw_bloomfilter_intersection(src->passive, dst->passive))
231 |              ? dst
232 |              : NULL;
233 | }
234 | 
235 | struct tw_bloomfilter_a2 *
236 | tw_bloomfilter_a2_xor(const struct tw_bloomfilter_a2 *src,
237 |                       struct tw_bloomfilter_a2 *dst)
238 | {
239 |   if (!src || !dst) {
240 |     return NULL;
241 |   }
242 | 
243 |   if (!tw_almost_equal(src->density, dst->density)) {
244 |     return NULL;
245 |   }
246 | 
247 |   return (tw_bloomfilter_xor(src->active, dst->active) &&
248 |           tw_bloomfilter_xor(src->passive, dst->passive))
249 |              ? dst
250 |              : NULL;
251 | }
252 | 


--------------------------------------------------------------------------------
/src/twiddle/hyperloglog/hyperloglog.c:
--------------------------------------------------------------------------------
  1 | #include <assert.h>
  2 | #include <math.h>
  3 | #include <string.h>
  4 | #include <x86intrin.h>
  5 | 
  6 | #include <twiddle/hyperloglog/hyperloglog.h>
  7 | #include <twiddle/utils/hash.h>
  8 | 
  9 | #include "../macrology.h"
 10 | #include "hyperloglog_simd.c"
 11 | 
 12 | #define TW_BYTES_PER_HLL_REGISTER sizeof(uint8_t)
 13 | #define TW_BITS_PER_HLL_REGISTER (TW_BYTES_PER_HLL * TW_BITS_IN_WORD)
 14 | 
 15 | #define TW_HLL_DEFAULT_SEED 646086642ULL
 16 | 
 17 | static_assert(TW_HLL_MIN_PRECISION >= 6,
 18 |               "precision must be at least one cacheline");
 19 | 
 20 | static_assert(TW_HLL_MAX_PRECISION < 64,
 21 |               "precision must be smaller than 64 for defined bit shifts");
 22 | 
 23 | struct tw_hyperloglog *tw_hyperloglog_new(uint8_t precision)
 24 | {
 25 |   if (precision < TW_HLL_MIN_PRECISION || precision > TW_HLL_MAX_PRECISION) {
 26 |     return NULL;
 27 |   }
 28 | 
 29 |   struct tw_hyperloglog *hll = calloc(1, sizeof(struct tw_hyperloglog));
 30 |   if (!hll) {
 31 |     return NULL;
 32 |   }
 33 | 
 34 |   size_t alloc_size = TW_ALLOC_TO_CACHELINE(1 << precision) * sizeof(uint8_t);
 35 | 
 36 |   if ((hll->registers = malloc_aligned(TW_CACHELINE, alloc_size)) == NULL) {
 37 |     free(hll);
 38 |     return NULL;
 39 |   }
 40 | 
 41 |   memset(hll->registers, 0, alloc_size);
 42 |   hll->precision = precision;
 43 | 
 44 |   return hll;
 45 | }
 46 | 
 47 | void tw_hyperloglog_free(struct tw_hyperloglog *hll)
 48 | {
 49 |   if (!hll) {
 50 |     return;
 51 |   }
 52 | 
 53 |   free(hll->registers);
 54 |   free(hll);
 55 | }
 56 | 
 57 | struct tw_hyperloglog *tw_hyperloglog_copy(const struct tw_hyperloglog *src,
 58 |                                            struct tw_hyperloglog *dst)
 59 | {
 60 |   if (!src || !dst || src->precision != dst->precision) {
 61 |     return NULL;
 62 |   }
 63 | 
 64 |   const uint8_t precision = src->precision;
 65 |   const uint32_t n_registers = 1 << precision;
 66 | 
 67 |   dst->precision = precision;
 68 |   memcpy(dst->registers, src->registers,
 69 |          n_registers * TW_BYTES_PER_HLL_REGISTER);
 70 | 
 71 |   return dst;
 72 | }
 73 | 
 74 | struct tw_hyperloglog *tw_hyperloglog_clone(const struct tw_hyperloglog *src)
 75 | {
 76 |   if (!src) {
 77 |     return NULL;
 78 |   }
 79 | 
 80 |   struct tw_hyperloglog *dst = tw_hyperloglog_new(src->precision);
 81 |   if (dst == NULL) {
 82 |     return NULL;
 83 |   }
 84 | 
 85 |   return tw_hyperloglog_copy(src, dst);
 86 | }
 87 | 
 88 | void tw_hyperloglog_add(struct tw_hyperloglog *hll, const void *key,
 89 |                         size_t key_size)
 90 | {
 91 |   if (!hll || !key || !key_size) {
 92 |     return;
 93 |   }
 94 | 
 95 |   const tw_uint128_t hash =
 96 |       tw_metrohash_128(TW_HLL_DEFAULT_SEED, key, key_size);
 97 |   const uint8_t precision = hll->precision;
 98 | 
 99 |   const uint32_t register_idx = hash.l >> (64 - precision);
100 | 
101 |   const uint8_t leading_zeros = __builtin_clzll(hash.h) + 1;
102 |   const uint8_t cur_leading_zeros = hll->registers[register_idx];
103 |   hll->registers[register_idx] = tw_max(leading_zeros, cur_leading_zeros);
104 | }
105 | 
106 | extern double estimate(uint8_t precision, uint32_t n_zeros, float inverse_sum);
107 | 
108 | #ifdef USE_AVX2
109 | extern void hyperloglog_count_avx2(const uint8_t *registers,
110 |                                    uint32_t n_registers, float *inverse_sum,
111 |                                    uint32_t *n_zeros);
112 | #elif defined USE_AVX
113 | extern void hyperloglog_count_avx(const uint8_t *registers,
114 |                                   uint32_t n_registers, float *inverse_sum,
115 |                                   uint32_t *n_zeros);
116 | #else
117 | extern void hyperloglog_count_port(const uint8_t *registers,
118 |                                    uint32_t n_registers, float *inverse_sum,
119 |                                    uint32_t *n_zeros);
120 | #endif
121 | 
122 | double tw_hyperloglog_count(const struct tw_hyperloglog *hll)
123 | {
124 |   if (!hll) {
125 |     return 0.0;
126 |   }
127 | 
128 |   const uint8_t precision = hll->precision;
129 |   const uint32_t n_registers = 1 << precision;
130 |   uint32_t n_zeros = 0;
131 |   float inverse_sum = 0.0;
132 | 
133 | #ifdef USE_AVX2
134 |   hyperloglog_count_avx2(hll->registers, n_registers, &inverse_sum, &n_zeros);
135 | #elif defined USE_AVX
136 |   hyperloglog_count_avx(hll->registers, n_registers, &inverse_sum, &n_zeros);
137 | #else
138 |   hyperloglog_count_port(hll->registers, n_registers, &inverse_sum, &n_zeros);
139 | #endif
140 | 
141 |   return estimate(precision, n_zeros, inverse_sum);
142 | }
143 | 
144 | bool tw_hyperloglog_equal(const struct tw_hyperloglog *fst,
145 |                           const struct tw_hyperloglog *snd)
146 | {
147 |   if (!fst || !snd) {
148 |     return false;
149 |   }
150 | 
151 |   const uint8_t precision = fst->precision;
152 | 
153 |   if (precision != snd->precision) {
154 |     return false;
155 |   }
156 | 
157 |   const uint32_t n_registers = 1 << precision;
158 | 
159 | #define HLL_EQ_LOOP(simd_t, simd_load, simd_equal)                             \
160 |   for (size_t i = 0; i < n_registers / (sizeof(simd_t)); ++i) {                \
161 |     simd_t *fst_addr = (simd_t *)fst->registers + i,                           \
162 |            *snd_addr = (simd_t *)snd->registers + i;                           \
163 |     if (!simd_equal(simd_load(fst_addr), simd_load(snd_addr))) {               \
164 |       return false;                                                            \
165 |     }                                                                          \
166 |   }
167 | 
168 | /* AVX512 does not have movemask_epi8 equivalent, fallback to AVX2 */
169 | #ifdef USE_AVX2
170 |   HLL_EQ_LOOP(__m256i, _mm256_load_si256, tw_mm256_equal)
171 | #elif defined USE_AVX
172 |   HLL_EQ_LOOP(__m128i, _mm_load_si128, tw_mm_equal)
173 | #else
174 |   for (size_t i = 0; i < n_registers; ++i) {
175 |     if (fst->registers[i] != snd->registers[i]) {
176 |       return false;
177 |     }
178 |   }
179 | #endif
180 | 
181 | #undef HLL_EQ_LOOP
182 | 
183 |   return true;
184 | }
185 | 
186 | struct tw_hyperloglog *tw_hyperloglog_merge(const struct tw_hyperloglog *src,
187 |                                             struct tw_hyperloglog *dst)
188 | {
189 |   if (!src || !dst || src->precision != dst->precision) {
190 |     return NULL;
191 |   }
192 | 
193 |   const uint8_t precision = src->precision;
194 |   const uint32_t n_registers = 1 << precision;
195 | 
196 | #define HLL_MAX_LOOP(simd_t, simd_load, simd_max, simd_store)                  \
197 |   for (size_t i = 0; i < n_registers / sizeof(simd_t); ++i) {                  \
198 |     simd_t *src_vec = (simd_t *)src->registers + i,                            \
199 |            *dst_vec = (simd_t *)dst->registers + i;                            \
200 |     const simd_t res = simd_max(simd_load(src_vec), simd_load(dst_vec));       \
201 |     simd_store(dst_vec, res);                                                  \
202 |   }
203 | 
204 | #ifdef USE_AVX512
205 |   HLL_MAX_LOOP(__m512i, _mm512_load_si512, _mm512_max_epu8, _mm512_store_si512)
206 | #elif defined USE_AVX2
207 |   HLL_MAX_LOOP(__m256i, _mm256_load_si256, _mm256_max_epu8, _mm256_store_si256)
208 | #elif defined USE_AVX
209 |   HLL_MAX_LOOP(__m128i, _mm_load_si128, _mm_max_epu8, _mm_store_si128)
210 | #else
211 |   for (size_t i = 0; i < n_registers; ++i) {
212 |     dst->registers[i] = tw_max(src->registers[i], dst->registers[i]);
213 |   }
214 | #endif
215 | 
216 | #undef HLL_MAX_LOOP
217 | 
218 |   return dst;
219 | }
220 | 


--------------------------------------------------------------------------------
/include/twiddle/bloomfilter/bloomfilter.h:
--------------------------------------------------------------------------------
  1 | #ifndef TWIDDLE_BLOOMFILTER_H
  2 | #define TWIDDLE_BLOOMFILTER_H
  3 | 
  4 | #include <math.h>
  5 | #include <stdbool.h>
  6 | #include <stddef.h>
  7 | #include <stdint.h>
  8 | 
  9 | #define TW_LOG_2 0.6931471805599453
 10 | 
 11 | #define tw_bloomfilter_optimal_m(n, p) (-n * log(p) / (TW_LOG_2 * TW_LOG_2))
 12 | #define tw_bloomfilter_optimal_k(n, m) (m / n * TW_LOG_2)
 13 | 
 14 | struct tw_bitmap;
 15 | 
 16 | /**
 17 |  * bloomfilter data structure
 18 |  *
 19 |  * This bloomfilter is static and does not support automatic resizing. The
 20 |  * underlaying storage is `struct tw_bitmap`.
 21 |  */
 22 | struct tw_bloomfilter {
 23 |   /** number of hash functions */
 24 |   uint16_t k;
 25 |   /** bitmap holding the bits */
 26 |   struct tw_bitmap *bitmap;
 27 | };
 28 | 
 29 | /**
 30 |  * Allocate a `struct tw_bloomfilter`.
 31 |  *
 32 |  * @param size number of bits the bloomfilter should hold, between
 33 |  *             (0, TW_BITMAP_MAX_BITS].
 34 |  * @param k stricly positive number of hash functions used
 35 |  *
 36 |  * @return `NULL` if allocation failed, otherwise a pointer to the newly
 37 |  *         allocated `struct tw_bloomfilter`
 38 |  *
 39 |  * @note group:bloomfilter
 40 |  */
 41 | struct tw_bloomfilter *tw_bloomfilter_new(uint64_t size, uint16_t k);
 42 | 
 43 | /**
 44 |  * Free a `struct tw_bloomfilter`.
 45 |  *
 46 |  * @param bf bloomfilter to free
 47 |  *
 48 |  * @note group:bloomfilter
 49 |  */
 50 | void tw_bloomfilter_free(struct tw_bloomfilter *bf);
 51 | 
 52 | /**
 53 |  * Copy a source `struct tw_bloomfilter` into a specified destination.
 54 |  *
 55 |  * @param src non-null bloomfilter to copy from
 56 |  * @param dst non-null bloomfilter to copy to
 57 |  *
 58 |  * @return `NULL` if any filter is null or not of the same cardinality,
 59 |  *         otherwise a pointer to dst
 60 |  *
 61 |  * @note group:bloomfilter
 62 |  */
 63 | struct tw_bloomfilter *tw_bloomfilter_copy(const struct tw_bloomfilter *src,
 64 |                                            struct tw_bloomfilter *dst);
 65 | 
 66 | /**
 67 |  * Clone a `struct tw_bloomfilter` into a newly allocated one.
 68 |  *
 69 |  * @param bf non-null bloomfilter to clone
 70 |  *
 71 |  * @return `NULL` if failed, otherwise a newly allocated bloomfilter initialized
 72 |  *         from the requested bloomfilter. The caller is responsible to
 73 |  *         deallocate with tw_bloomfilter_free
 74 |  *
 75 |  * @note group:bloomfilter
 76 |  */
 77 | struct tw_bloomfilter *tw_bloomfilter_clone(const struct tw_bloomfilter *bf);
 78 | 
 79 | /**
 80 |  * Set an element in a `struct tw_bloomfilter`.
 81 |  *
 82 |  * @param bf non-null bloomfilter affected
 83 |  * @param key non-null buffer of the key to add
 84 |  * @param key_size stricly positive size of the buffer key to add
 85 |  *
 86 |  * @note group:bloomfilter
 87 |  */
 88 | void tw_bloomfilter_set(struct tw_bloomfilter *bf, const void *key,
 89 |                         size_t key_size);
 90 | 
 91 | /**
 92 |  * Verify if an element is present in a `struct tw_bloomfilter`.
 93 |  *
 94 |  * @param bf non-null bloomfilter affected
 95 |  * @param key non-null buffer of the key to test
 96 |  * @param key_size stricly positive size of the buffer key to test
 97 |  *
 98 |  * @return `false` if preconditions are not met, otherwise indicator if the
 99 |  *         element is in the bloomfilter (with possibility of false positives)
100 |  *
101 |  * @note group:bloomfilter
102 |  */
103 | bool tw_bloomfilter_test(const struct tw_bloomfilter *bf, const void *key,
104 |                          size_t key_size);
105 | /**
106 |  * Verify if a `struct tw_bloomfilter` is empty.
107 |  *
108 |  * @param bf non-null bloomfilter to verify emptyness
109 |  *
110 |  * @return `false` if bf is null, otherwise indicator if the bloomfilter is
111 |  *         empty.
112 |  *
113 |  * @note group:bloomfilter
114 |  */
115 | bool tw_bloomfilter_empty(const struct tw_bloomfilter *bf);
116 | 
117 | /**
118 |  * Verify if a `struct tw_bloomfilter` is full.
119 |  *
120 |  * @param bf non-null bloomfilter to verify fullness
121 |  *
122 |  * @return `false` if bf is null, otherwise indicator if the bloomfilter is
123 |  *         full.
124 |  *
125 |  * @note group:bloomfilter
126 |  */
127 | bool tw_bloomfilter_full(const struct tw_bloomfilter *bf);
128 | 
129 | /**
130 |  * Count the number of active bits in a `struct tw_bloomfilter`.
131 |  *
132 |  * @param bf non-null bloomfilter to count active bits
133 |  *
134 |  * @return `0` if bf is null, otherwise the number of active bits
135 |  *
136 |  * @note group:bloomfilter
137 |  */
138 | uint64_t tw_bloomfilter_count(const struct tw_bloomfilter *bf);
139 | 
140 | /**
141 |  * Count the percentage of active bits in a `struct tw_bloomfilter`.
142 |  *
143 |  * @param bf non-null bloomfilter to count the density
144 |  *
145 |  * @return `0.0` if bf is null, otherwise the portion of active bits
146 |  *         expressed as (count / size).
147 |  *
148 |  * @note group:bloomfilter
149 |  */
150 | float tw_bloomfilter_density(const struct tw_bloomfilter *bf);
151 | 
152 | /**
153 |  * Zero all bits in a `struct tw_bloomfilter`.
154 |  *
155 |  * @param bf non-null bloomfilter to zero
156 |  *
157 |  * @return `NULL` if bf is null, otherwise a pointer to bf on successful
158 |  *         operation
159 |  *
160 |  * @note group:bloomfilter
161 |  */
162 | struct tw_bloomfilter *tw_bloomfilter_zero(struct tw_bloomfilter *bf);
163 | 
164 | /**
165 |  * Fill all bits in a `struct tw_bloomfilter`.
166 |  *
167 |  * @param bf non-null bloomfilter to fill
168 |  *
169 |  * @return `NULL` if bf is null, otherwise a pointer to bf on successful
170 |  *         operation
171 |  *
172 |  * @note group:bloomfilter
173 |  */
174 | struct tw_bloomfilter *tw_bloomfilter_fill(struct tw_bloomfilter *bf);
175 | 
176 | /**
177 |  * Inverse all bits and zeroes in a `struct tw_bloomfilter`.
178 |  *
179 |  * @param bf non-null bloomfilter to inverse
180 |  *
181 |  * @return `NULL` if failed, otherwise the bloomfilter
182 |  *
183 |  * @note group:bloomfilter
184 |  */
185 | struct tw_bloomfilter *tw_bloomfilter_not(struct tw_bloomfilter *bf);
186 | 
187 | /**
188 |  * Verify if `struct tw_bloomfilter`s are equal.
189 |  *
190 |  * @param fst first non-null bloomfilter to check
191 |  * @param snd second non-null bloomfilter to check
192 |  *
193 |  * @return `false` any bloomfilter is null or hashes are not of the same
194 |  *         cardinality, otherwise indicator if filters are equal
195 |  *
196 |  * @note group:bloomfilter
197 |  */
198 | bool tw_bloomfilter_equal(const struct tw_bloomfilter *a,
199 |                           const struct tw_bloomfilter *b);
200 | 
201 | /**
202 |  * Compute the union of `struct tw_bloomfilter`s.
203 |  *
204 |  * @param src non-null bloomfilter to union from
205 |  * @param dst non-null bloomfilter to union to
206 |  *
207 |  * @return: `NULL` if failed, otherwise pointer to dst
208 |  *
209 |  * @note group:bloomfilter
210 |  */
211 | struct tw_bloomfilter *tw_bloomfilter_union(const struct tw_bloomfilter *src,
212 |                                             struct tw_bloomfilter *dst);
213 | 
214 | /**
215 |  * Compute the intersection of `struct tw_bloomfilter`s.
216 |  *
217 |  * @param src non-null bloomfilter to intersect from
218 |  * @param dst non-null bloomfilter to intersect to
219 |  *
220 |  * @return: `NULL` if failed, otherwise pointer to dst
221 |  *
222 |  * @note group:bloomfilter
223 |  */
224 | struct tw_bloomfilter *
225 | tw_bloomfilter_intersection(const struct tw_bloomfilter *src,
226 |                             struct tw_bloomfilter *dst);
227 | 
228 | /**
229 |  * Compute the symetric difference of `struct tw_bloomfilter`s.
230 |  *
231 |  * @param src non-null bloomfilter to xor from
232 |  * @param dst non-null bloomfilter to xor to
233 |  *
234 |  * @return: `NULL` if failed, otherwise pointer to dst
235 |  *
236 |  * @note group:bloomfilter
237 |  */
238 | struct tw_bloomfilter *tw_bloomfilter_xor(const struct tw_bloomfilter *src,
239 |                                           struct tw_bloomfilter *dst);
240 | 
241 | #endif /* TWIDDLE_BLOOMFILTER_H */
242 | 


--------------------------------------------------------------------------------
/tests/test-bloomfilter.c:
--------------------------------------------------------------------------------
  1 | #include <stdlib.h>
  2 | #include <twiddle/bloomfilter/bloomfilter.h>
  3 | 
  4 | #include "../src/twiddle/macrology.h"
  5 | #include "test.h"
  6 | 
  7 | START_TEST(test_bloomfilter_basic)
  8 | {
  9 |   DESCRIBE_TEST;
 10 | 
 11 |   const uint32_t sizes[] = {32, 64, 128, 256, 512, 1024, 2048, 4096, 1 << 17};
 12 |   const uint32_t ks[] = {1, 2, 3, 4, 5, 6, 7, 8, 17};
 13 |   const uint32_t offsets[] = {-1, 0, 1};
 14 |   const char *values[] = {"herp", "derp", "ferp", "merp"};
 15 | 
 16 |   for (size_t i = 0; i < TW_ARRAY_SIZE(sizes); ++i) {
 17 |     for (size_t j = 0; j < TW_ARRAY_SIZE(offsets); ++j) {
 18 |       const uint32_t nbits = sizes[i] + offsets[j];
 19 |       const uint32_t k = ks[i];
 20 |       struct tw_bloomfilter *bf = tw_bloomfilter_new(nbits, k);
 21 | 
 22 |       for (size_t l = 0; l < TW_ARRAY_SIZE(values); ++l) {
 23 |         const char *value = values[l];
 24 |         tw_bloomfilter_set(bf, value, strlen(value));
 25 |         ck_assert(tw_bloomfilter_test(bf, value, strlen(value)));
 26 |       }
 27 | 
 28 |       /**
 29 |        * This is prone to failure and may be removed if causing problem.
 30 |        */
 31 |       const char *not_there = "oups!";
 32 |       ck_assert(!tw_bloomfilter_test(bf, not_there, strlen(not_there)));
 33 | 
 34 |       tw_bloomfilter_free(bf);
 35 |     }
 36 |   }
 37 | }
 38 | END_TEST
 39 | 
 40 | START_TEST(test_bloomfilter_copy_and_clone)
 41 | {
 42 |   DESCRIBE_TEST;
 43 | 
 44 |   const uint32_t sizes[] = {1024, 2048, 4096, 1 << 17};
 45 |   const uint32_t ks[] = {6, 7, 8, 17};
 46 |   const uint32_t offsets[] = {-1, 0, 1};
 47 | 
 48 |   const char *values[] = {"herp", "derp", "ferp", "merp"};
 49 | 
 50 |   for (size_t i = 0; i < TW_ARRAY_SIZE(sizes); ++i) {
 51 |     for (size_t j = 0; j < TW_ARRAY_SIZE(offsets); ++j) {
 52 |       const uint32_t nbits = sizes[i] + offsets[j];
 53 |       const uint32_t k = ks[i];
 54 |       struct tw_bloomfilter *bf = tw_bloomfilter_new(nbits, k);
 55 | 
 56 |       for (size_t l = 0; l < TW_ARRAY_SIZE(values); ++l) {
 57 |         const char *value = values[l];
 58 |         tw_bloomfilter_set(bf, value, strlen(value));
 59 |       }
 60 | 
 61 |       struct tw_bloomfilter *copy = tw_bloomfilter_new(nbits, k);
 62 |       tw_bloomfilter_copy(bf, copy);
 63 |       struct tw_bloomfilter *clone = tw_bloomfilter_clone(copy);
 64 | 
 65 |       for (size_t l = 0; l < TW_ARRAY_SIZE(values); ++l) {
 66 |         const char *value = values[l];
 67 |         ck_assert(tw_bloomfilter_test(bf, value, strlen(value)));
 68 |         ck_assert(tw_bloomfilter_test(copy, value, strlen(value)));
 69 |         ck_assert(tw_bloomfilter_test(clone, value, strlen(value)));
 70 |       }
 71 | 
 72 |       /**
 73 |        * This is prone to failure and may be removed if causing problem.
 74 |        */
 75 |       const char *not_there = "oups!";
 76 |       ck_assert(!tw_bloomfilter_test(bf, not_there, strlen(not_there)));
 77 | 
 78 |       /**
 79 |        * Quickly validate independance
 80 |        */
 81 |       tw_bloomfilter_zero(bf);
 82 |       ck_assert(tw_bloomfilter_empty(bf));
 83 |       ck_assert(!tw_bloomfilter_empty(copy));
 84 |       ck_assert(!tw_bloomfilter_empty(clone));
 85 | 
 86 |       tw_bloomfilter_zero(copy);
 87 |       ck_assert(tw_bloomfilter_empty(copy));
 88 |       ck_assert(!tw_bloomfilter_empty(clone));
 89 | 
 90 |       tw_bloomfilter_free(bf);
 91 |       tw_bloomfilter_free(copy);
 92 |       tw_bloomfilter_free(clone);
 93 |     }
 94 |   }
 95 | }
 96 | END_TEST
 97 | 
 98 | START_TEST(test_bloomfilter_set_operations)
 99 | {
100 |   DESCRIBE_TEST;
101 | 
102 |   const int32_t sizes[] = {1024, 2048, 4096};
103 |   const int32_t ks[] = {6, 7, 8};
104 |   const int32_t offsets[] = {-1, 0, 1};
105 |   const char *values[] = {"herp", "derp", "ferp", "merp"};
106 | 
107 |   for (size_t i = 0; i < TW_ARRAY_SIZE(sizes); ++i) {
108 |     for (size_t j = 0; j < TW_ARRAY_SIZE(offsets); ++j) {
109 |       const int32_t nbits = sizes[i] + offsets[j];
110 |       const int32_t k = ks[i];
111 |       struct tw_bloomfilter *src = tw_bloomfilter_new(nbits, k);
112 |       struct tw_bloomfilter *dst = tw_bloomfilter_new(nbits, k);
113 | 
114 |       tw_bloomfilter_set(src, values[0], strlen(values[0]));
115 |       tw_bloomfilter_set(src, values[1], strlen(values[1]));
116 |       tw_bloomfilter_set(src, values[2], strlen(values[2]));
117 | 
118 |       tw_bloomfilter_set(dst, values[1], strlen(values[1]));
119 |       tw_bloomfilter_set(dst, values[2], strlen(values[2]));
120 |       tw_bloomfilter_set(dst, values[3], strlen(values[3]));
121 | 
122 |       ck_assert_ptr_ne(tw_bloomfilter_intersection(src, dst), NULL);
123 |       ck_assert(!tw_bloomfilter_test(dst, values[0], strlen(values[0])));
124 |       ck_assert(tw_bloomfilter_test(dst, values[1], strlen(values[1])));
125 |       ck_assert(tw_bloomfilter_test(dst, values[2], strlen(values[2])));
126 |       ck_assert(!tw_bloomfilter_test(dst, values[3], strlen(values[3])));
127 | 
128 |       ck_assert_ptr_ne(tw_bloomfilter_union(src, dst), NULL);
129 |       ck_assert(tw_bloomfilter_test(dst, values[0], strlen(values[0])));
130 |       ck_assert(tw_bloomfilter_test(dst, values[1], strlen(values[1])));
131 |       ck_assert(tw_bloomfilter_test(dst, values[2], strlen(values[2])));
132 |       ck_assert(!tw_bloomfilter_test(dst, values[3], strlen(values[3])));
133 |       ck_assert(tw_bloomfilter_equal(src, dst));
134 | 
135 |       tw_bloomfilter_free(src);
136 |       tw_bloomfilter_free(dst);
137 |     }
138 |   }
139 | }
140 | END_TEST
141 | 
142 | START_TEST(test_bloomfilter_errors)
143 | {
144 |   DESCRIBE_TEST;
145 | 
146 |   uint8_t k = 8;
147 |   uint64_t size = 1 << 18;
148 | 
149 |   struct tw_bloomfilter *a = tw_bloomfilter_new(size, k),
150 |                         *b = tw_bloomfilter_new(size + 1, k),
151 |                         *c = tw_bloomfilter_new(size, k + 1);
152 | 
153 |   ck_assert_ptr_eq(tw_bloomfilter_clone(NULL), NULL);
154 |   ck_assert_ptr_eq(tw_bloomfilter_copy(a, NULL), NULL);
155 |   ck_assert_ptr_eq(tw_bloomfilter_copy(NULL, NULL), NULL);
156 |   ck_assert_ptr_eq(tw_bloomfilter_copy(a, b), NULL);
157 |   ck_assert_ptr_eq(tw_bloomfilter_copy(a, c), c);
158 | 
159 |   tw_bloomfilter_set(NULL, NULL, 0);
160 |   tw_bloomfilter_set(a, NULL, 1);
161 |   tw_bloomfilter_set(a, &k, 0);
162 | 
163 |   tw_bloomfilter_fill(a);
164 | 
165 |   ck_assert(!tw_bloomfilter_test(NULL, NULL, 0));
166 |   ck_assert(!tw_bloomfilter_test(a, NULL, 1));
167 |   ck_assert(!tw_bloomfilter_test(a, &k, 0));
168 | 
169 |   ck_assert(!tw_bloomfilter_empty(NULL));
170 |   ck_assert(!tw_bloomfilter_full(NULL));
171 |   ck_assert_int_eq(tw_bloomfilter_count(NULL), 0);
172 |   ck_assert_ptr_eq(tw_bloomfilter_zero(NULL), NULL);
173 |   ck_assert_ptr_eq(tw_bloomfilter_fill(NULL), NULL);
174 |   ck_assert_ptr_eq(tw_bloomfilter_not(NULL), NULL);
175 | 
176 |   ck_assert(!tw_bloomfilter_equal(NULL, NULL));
177 |   ck_assert(!tw_bloomfilter_equal(a, NULL));
178 |   ck_assert(!tw_bloomfilter_equal(a, b));
179 |   ck_assert(!tw_bloomfilter_equal(a, c));
180 | 
181 |   ck_assert_ptr_eq(tw_bloomfilter_union(NULL, NULL), NULL);
182 |   ck_assert_ptr_eq(tw_bloomfilter_union(a, NULL), NULL);
183 |   ck_assert_ptr_eq(tw_bloomfilter_union(NULL, b), NULL);
184 | 
185 |   ck_assert_ptr_eq(tw_bloomfilter_intersection(NULL, NULL), NULL);
186 |   ck_assert_ptr_eq(tw_bloomfilter_intersection(a, NULL), NULL);
187 |   ck_assert_ptr_eq(tw_bloomfilter_intersection(NULL, b), NULL);
188 | 
189 |   ck_assert_ptr_eq(tw_bloomfilter_xor(NULL, NULL), NULL);
190 |   ck_assert_ptr_eq(tw_bloomfilter_xor(a, NULL), NULL);
191 |   ck_assert_ptr_eq(tw_bloomfilter_xor(NULL, b), NULL);
192 | 
193 |   tw_bloomfilter_density(NULL);
194 | 
195 |   tw_bloomfilter_free(NULL);
196 |   tw_bloomfilter_free(c);
197 |   tw_bloomfilter_free(b);
198 |   tw_bloomfilter_free(a);
199 | }
200 | END_TEST
201 | 
202 | int run_tests()
203 | {
204 |   int number_failed;
205 | 
206 |   Suite *s = suite_create("bloomfilter");
207 |   SRunner *runner = srunner_create(s);
208 |   TCase *tc = tcase_create("basic");
209 |   tcase_add_test(tc, test_bloomfilter_basic);
210 |   tcase_add_test(tc, test_bloomfilter_copy_and_clone);
211 |   tcase_add_test(tc, test_bloomfilter_set_operations);
212 |   tcase_add_test(tc, test_bloomfilter_errors);
213 |   suite_add_tcase(s, tc);
214 |   srunner_run_all(runner, CK_NORMAL);
215 |   number_failed = srunner_ntests_failed(runner);
216 |   srunner_free(runner);
217 | 
218 |   return number_failed;
219 | }
220 | 
221 | int main() { return (run_tests() == 0) ? EXIT_SUCCESS : EXIT_FAILURE; }
222 | 


--------------------------------------------------------------------------------
/include/twiddle/bloomfilter/bloomfilter_a2.h:
--------------------------------------------------------------------------------
  1 | #ifndef TWIDDLE_BLOOMFILTER_A2_H
  2 | #define TWIDDLE_BLOOMFILTER_A2_H
  3 | 
  4 | #include <stdbool.h>
  5 | #include <stdint.h>
  6 | 
  7 | struct tw_bloomfilter;
  8 | 
  9 | /**
 10 |  * Aging bloomfilter with active buffers data structure
 11 |  *
 12 |  * The paper "Aging bloom filter with two active buffers for dynamic sets"
 13 |  * describe a method where 2 bloom filters are used to implement a FIFO.
 14 |  *
 15 |  * Elements are added to `active` until `density` (on active) is attained;
 16 |  * then `passive` is cleared and both filters are swapped.
 17 |  */
 18 | struct tw_bloomfilter_a2 {
 19 |   /** density threshold to trigger rotation */
 20 |   float density;
 21 |   /** pointer to active bloomfilter */
 22 |   struct tw_bloomfilter *active;
 23 |   /** pointer to passive bloomfilter */
 24 |   struct tw_bloomfilter *passive;
 25 | };
 26 | 
 27 | /**
 28 |  * Allocate a `struct tw_bloomfilter_a2`.
 29 |  *
 30 |  * @param size number of bits the bloomfilter should hold, between
 31 |  *             (0, TW_BITMAP_MAX_BITS].
 32 |  * @param k stricly positive number of hash functions used
 33 |  * @param density threshold for rotation within (0, 1]
 34 |  *
 35 |  * @return `NULL` if allocation failed, otherwise a pointer to the newly
 36 |  *         allocated `struct tw_bloomfilter`
 37 |  *
 38 |  * @note group:bloomfilter_a2
 39 |  */
 40 | struct tw_bloomfilter_a2 *tw_bloomfilter_a2_new(uint64_t size, uint16_t k,
 41 |                                                 float dentisy);
 42 | 
 43 | /**
 44 |  * Free a `struct tw_bloomfilter_a2`.
 45 |  *
 46 |  * @param bf bloomfilter to free
 47 |  *
 48 |  * @note group:bloomfilter_a2
 49 |  */
 50 | void tw_bloomfilter_a2_free(struct tw_bloomfilter_a2 *bf);
 51 | 
 52 | /**
 53 |  * Copy a source `struct tw_bloomfilter_a2` into a specified destination.
 54 |  *
 55 |  * @param src non-null bloomfilter to copy from
 56 |  * @param dst non-null bloomfilter to copy to
 57 |  *
 58 |  * @return `NULL` if any filter is null or not of the same cardinality,
 59 |  *         otherwise a pointer to dst
 60 |  *
 61 |  * @note group:bloomfilter_a2
 62 |  */
 63 | struct tw_bloomfilter_a2 *
 64 | tw_bloomfilter_a2_copy(const struct tw_bloomfilter_a2 *src,
 65 |                        struct tw_bloomfilter_a2 *dst);
 66 | 
 67 | /**
 68 |  * Clone a `struct tw_bloomfilter_a2` into a newly allocated one.
 69 |  *
 70 |  * @param bf non-null bloomfilter to clone
 71 |  *
 72 |  * @return `NULL` if failed, otherwise a newly allocated bloomfilter initialized
 73 |  *         from the requested bloomfilter. The caller is responsible to
 74 |  *         deallocate with tw_bloomfilter_a2_free
 75 |  *
 76 |  * @note group:bloomfilter_a2
 77 |  */
 78 | struct tw_bloomfilter_a2 *
 79 | tw_bloomfilter_a2_clone(const struct tw_bloomfilter_a2 *bf);
 80 | 
 81 | /**
 82 |  * Set an element in a `struct tw_bloomfilter_a2`.
 83 |  *
 84 |  * @param bf non-null bloomfilter affected
 85 |  * @param key non-null buffer of the key to add
 86 |  * @param key_size stricly positive size of the buffer key to add
 87 |  *
 88 |  * @note group:bloomfilter_a2
 89 |  */
 90 | void tw_bloomfilter_a2_set(struct tw_bloomfilter_a2 *bf, const void *key,
 91 |                            size_t key_size);
 92 | 
 93 | /**
 94 |  * Verify if an element is present in a `struct tw_bloomfilter_a2`.
 95 |  *
 96 |  * @param bf non-null bloomfilter affected
 97 |  * @param key non-null buffer of the key to test
 98 |  * @param key_size stricly positive size of the buffer key to test
 99 |  *
100 |  * @return `false` if preconditions are not met, otherwise indicator if the
101 |  *         element is in the bloomfilter (with possibility of false positives)
102 |  *
103 |  * @note group:bloomfilter_a2
104 |  */
105 | bool tw_bloomfilter_a2_test(const struct tw_bloomfilter_a2 *bf, const void *key,
106 |                             size_t key_size);
107 | 
108 | /**
109 |  * Verify if a `struct tw_bloomfilter_a2` is empty.
110 |  *
111 |  * @param bf non-null bloomfilter to verify emptyness
112 |  *
113 |  * @return `false` if bf is null, otherwise indicator if the bloomfilter is
114 |  *         empty.
115 |  *
116 |  * @note group:bloomfilter_a2
117 |  */
118 | bool tw_bloomfilter_a2_empty(const struct tw_bloomfilter_a2 *bf);
119 | 
120 | /**
121 |  * Verify if a `struct tw_bloomfilter_a2` is full.
122 |  *
123 |  * @param bf non-null bloomfilter to verify fullness
124 |  *
125 |  * @return `false` if bf is null, otherwise indicator if the bloomfilter is
126 |  *         full.
127 |  *
128 |  * @note group:bloomfilter_a2
129 |  */
130 | bool tw_bloomfilter_a2_full(const struct tw_bloomfilter_a2 *bf);
131 | 
132 | /**
133 |  * Count the number of active bits in a `struct tw_bloomfilter_a2`.
134 |  *
135 |  * @param bf non-null bloomfilter to count active bits
136 |  *
137 |  * @return `0` if bf is null, otherwise the number of active bits
138 |  *
139 |  * @note group:bloomfilter_a2
140 |  */
141 | uint64_t tw_bloomfilter_a2_count(const struct tw_bloomfilter_a2 *bf);
142 | 
143 | /**
144 |  * Count the percentage of active bits in a `struct tw_bloomfilter_a2`.
145 |  *
146 |  * @param bf non-null bloomfilter to count the density
147 |  *
148 |  * @return `0.0` if bf is null, otherwise the portion of active bits
149 |  *         expressed as (count / size).
150 |  *
151 |  * @note group:bloomfilter_a2
152 |  */
153 | float tw_bloomfilter_a2_density(const struct tw_bloomfilter_a2 *bf);
154 | 
155 | /**
156 |  * Zero all bits in a `struct tw_bloomfilter_a2`.
157 |  *
158 |  * @param bf non-null bloomfilter to zero
159 |  *
160 |  * @return `NULL` if bf is null, otherwise a pointer to bf on successful
161 |  *         operation
162 |  *
163 |  * @note group:bloomfilter_a2
164 |  */
165 | struct tw_bloomfilter_a2 *tw_bloomfilter_a2_zero(struct tw_bloomfilter_a2 *bf);
166 | 
167 | /**
168 |  * Fill all bits in a `struct tw_bloomfilter_a2`.
169 |  *
170 |  * @param bf non-null bloomfilter to fill
171 |  *
172 |  * @return `NULL` if bf is null, otherwise a pointer to bf on successful
173 |  *         operation
174 |  *
175 |  * @note group:bloomfilter_a2
176 |  */
177 | struct tw_bloomfilter_a2 *tw_bloomfilter_a2_fill(struct tw_bloomfilter_a2 *bf);
178 | 
179 | /**
180 |  * Inverse all bits and zeroes in a `struct tw_bloomfilter_a2`.
181 |  *
182 |  * @param bf non-null bloomfilter to inverse
183 |  *
184 |  * @return `NULL` if failed, otherwise the bloomfilter
185 |  *
186 |  * @note group:bloomfilter_a2
187 |  */
188 | struct tw_bloomfilter_a2 *tw_bloomfilter_a2_not(struct tw_bloomfilter_a2 *bf);
189 | 
190 | /**
191 |  * Verify if `struct tw_bloomfilter_a2`s are equal.
192 |  *
193 |  * @param fst first non-null bloomfilter to check
194 |  * @param snd second non-null bloomfilter to check
195 |  *
196 |  * @return `false` any bloomfilter is null or hashes are not of the same
197 |  *         cardinality, otherwise indicator if filters are equal
198 |  *
199 |  * @note group:bloomfilter_a2
200 |  */
201 | bool tw_bloomfilter_a2_equal(const struct tw_bloomfilter_a2 *fst,
202 |                              const struct tw_bloomfilter_a2 *snd);
203 | 
204 | /**
205 |  * Compute the union of `struct tw_bloomfilter_a2`s.
206 |  *
207 |  * @param src non-null bloomfilter to union from
208 |  * @param dst non-null bloomfilter to union to
209 |  *
210 |  * @return: `NULL` if failed, otherwise pointer to dst
211 |  *
212 |  * @note group:bloomfilter_a2
213 |  */
214 | struct tw_bloomfilter_a2 *
215 | tw_bloomfilter_a2_union(const struct tw_bloomfilter_a2 *src,
216 |                         struct tw_bloomfilter_a2 *dst);
217 | 
218 | /**
219 |  * Compute the intersection of `struct tw_bloomfilter_a2`s.
220 |  *
221 |  * @param src non-null bloomfilter to intersect from
222 |  * @param dst non-null bloomfilter to intersect to
223 |  *
224 |  * @return: `NULL` if failed, otherwise pointer to dst
225 |  *
226 |  * @note group:bloomfilter_a2
227 |  */
228 | struct tw_bloomfilter_a2 *
229 | tw_bloomfilter_a2_intersection(const struct tw_bloomfilter_a2 *src,
230 |                                struct tw_bloomfilter_a2 *dst);
231 | 
232 | /**
233 |  * Compute the symetric difference of `struct tw_bloomfilter_a2`s.
234 |  *
235 |  * @param src non-null bloomfilter to xor from
236 |  * @param dst non-null bloomfilter to xor to
237 |  *
238 |  * @return: `NULL` if failed, otherwise pointer to dst
239 |  *
240 |  * @note group:bloomfilter_a2
241 |  */
242 | struct tw_bloomfilter_a2 *
243 | tw_bloomfilter_a2_xor(const struct tw_bloomfilter_a2 *src,
244 |                       struct tw_bloomfilter_a2 *dst);
245 | 
246 | #endif /* TWIDDLE_BLOOMFILTER_A2_H */
247 | 


--------------------------------------------------------------------------------