├── .gitignore ├── .travis.yml ├── Doxyfile ├── README.md ├── SConstruct ├── ext ├── SConscript ├── demo.php └── php_ccard.i ├── include ├── adaptive_counting.h ├── ccard_common.h ├── hyperloglog_counting.h ├── hyperloglogplus_counting.h ├── linear_counting.h ├── lookup3hash.h ├── murmurhash.h ├── register_set.h └── sparse_bitmap.h ├── src ├── SConscript ├── adaptive_counting.c ├── hyperloglog_counting.c ├── hyperloglogplus_counting.c ├── linear_counting.c ├── lookup3hash.c ├── murmurhash.c ├── register_set.c └── sparse_bitmap.c ├── t ├── SConscript ├── adaptive_counting_unittest.cc ├── hyperloglog_counting_unittest.cc ├── hyperloglogplus_counting_unittest.cc ├── linear_counting_unittest.cc ├── lookup3hash_unittest.cc ├── murmurhash_unittest.cc └── register_set_unittest.cc └── util └── indent-src /.gitignore: -------------------------------------------------------------------------------- 1 | # Object files 2 | *.o 3 | *.os 4 | 5 | # Libraries 6 | *.lib 7 | *.a 8 | 9 | # Shared objects (inc. Windows DLLs) 10 | *.dll 11 | *.so 12 | *.so.* 13 | *.dylib 14 | 15 | # Executables 16 | *.exe 17 | *.out 18 | *.app 19 | 20 | # Google test 21 | t/unittest 22 | 23 | # SConstruct caches 24 | .scons* 25 | 26 | # Temporary files 27 | *~ 28 | *.orig 29 | 30 | -------------------------------------------------------------------------------- /.travis.yml: -------------------------------------------------------------------------------- 1 | language: php 2 | php: 3 | - 5.3 4 | - 5.4 5 | - 5.5 6 | - 5.6 7 | 8 | before_script: 9 | - sudo apt-get install libgtest-dev swig 10 | - cd /usr/src/gtest; sudo cmake .; sudo make; sudo mv libg* /usr/lib/; cd - 11 | 12 | # As we need compiling PHP extension, language must be set to php, which means 13 | # compiler: option doesn't work. So we have to manually setup gcc/clang 14 | # compiler matrix here. 15 | # FIXME: clang compiler not working in PHP vm anymore, need a solution... 16 | #env: 17 | # - CC=gcc CXX=g++ 18 | # - CC=clang CXX=clang++ 19 | 20 | script: scons && scons test && scons install --prefix=/tmp/test && scons install-php --prefix=/tmp/test 21 | 22 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | [![Stories in Ready](https://badge.waffle.io/chaoslawful/ccard-lib.png)](https://waffle.io/chaoslawful/ccard-lib) 2 | 3 | [![Build Status](https://travis-ci.org/chaoslawful/ccard-lib.png)](https://travis-ci.org/chaoslawful/ccard-lib) 4 | 5 | C library for estimating cardinality in data streams, in which case it is 6 | infeasible to store all events in memory. 7 | 8 | This library implements a series of cardinality estimating algorithms such as 9 | Linear Counting, LogLog Counting, HyperLogLog Counting and Adaptive Counting. 10 | For more information about these algorithms please read the 11 | [Reference](#reference) section. 12 | 13 | ## Building 14 | 15 | Building ccard-lib needs [scons](http://www.scons.org/). Please read [scons 16 | user guide](http://www.scons.org/doc/production/HTML/scons-user/index.html) for 17 | more information about it. 18 | 19 | Building PHP extension of ccard-lib needs [SWIG](http://www.swig.org/) to be 20 | installed. Running unit-tests needs 21 | [googletest](https://github.com/google/googletest) to be installed. 22 | 23 | ### Building as Library 24 | 25 | Assuming you have scons installed, just build ccard-lib like this: 26 | 27 | ```bash 28 | scons install 29 | ``` 30 | 31 | Scons will build and install ccard-lib to your system. 32 | 33 | You can also run unit-tests to make sure the library works as expected: 34 | 35 | ```bash 36 | scons test 37 | ``` 38 | 39 | By default ccard-lib will be installed at `/usr/local/lib`, if you want to 40 | change the install directory please replace the "libdir" setting in 41 | `SConsturct` file with your target directory. 42 | 43 | ### Building as PHP Extension 44 | 45 | The following command will build and install card-lib PHP extension: 46 | 47 | ```bash 48 | scons install-php 49 | ``` 50 | 51 | [SWIG](http://www.swig.org) is used to generate PHP extension, please install 52 | it before run this command. 53 | 54 | ### Uninstall 55 | 56 | If you want to uninstall ccard-lib from your system, use the following 57 | commands: 58 | 59 | ```bash 60 | scons -c install-php 61 | scons -c install 62 | ``` 63 | 64 | ## Synopsis 65 | 66 | ### Estimate Cardinality 67 | 68 | ```c 69 | #include "ccard_common.h" 70 | #include "adaptive_counting.h" 71 | 72 | int main(int argc, char **argv) { 73 | int64_t i, esti; 74 | 75 | /* construct context for cardinality estimator */ 76 | /* use xxx_cnt_init to construct context */ 77 | adp_cnt_ctx_t *ctx = adp_cnt_init(NULL, 16, CCARD_HASH_MURMUR); 78 | 79 | printf("Adaptive Counting with Murmurhash:\n"); 80 | 81 | /* add 500,000 elements to set */ 82 | for (i = 1; i <= 500000L; i++) { 83 | /* use xxx_cnt_offer to add new element to set */ 84 | adp_cnt_offer(ctx, &i, sizeof(int64_t)); 85 | 86 | /* print estimate result every 50,000 elements has been added */ 87 | if (i % 50000 == 0) { 88 | /* use xxx_cnt_card to get estimate result */ 89 | esti = adp_cnt_card(ctx); 90 | printf("actual: %9lu, estimated: %9lu, error: %+7.2f%%\n", 91 | (long unsigned int)i, (long unsigned int)esti, (double)(esti - i) / i * 100); 92 | } 93 | } 94 | printf("\n"); 95 | 96 | /* use xxx_cnt_fini to destory context */ 97 | adp_cnt_fini(ctx); 98 | } 99 | ``` 100 | 101 | ### Merge Bitmaps 102 | ```c 103 | #include "ccard_common.h" 104 | #include "adaptive_counting.h" 105 | 106 | int main(int argc, char **argv) { 107 | int64_t i, esti; 108 | 109 | /* for merging, contexts must have same length of bitmap and hash algorithm */ 110 | adp_cnt_ctx_t *ctx = adp_cnt_init(NULL, 16, CCARD_HASH_LOOKUP3); 111 | adp_cnt_ctx_t *tbm1 = adp_cnt_init(NULL, 16, CCARD_HASH_LOOKUP3); 112 | adp_cnt_ctx_t *tbm2 = adp_cnt_init(NULL, 16, CCARD_HASH_LOOKUP3); 113 | 114 | int32_t m = 1 << 16; 115 | /* bitmaps */ 116 | uint8_t buf1[m + 3], buf2[m + 3]; 117 | uint32_t len1 = m + 3, len2 = m + 3; 118 | 119 | for (i = 1; i <= 20000L; i++) { 120 | adp_cnt_offer(ctx, &i, sizeof(uint64_t)); 121 | } 122 | for (i = 10000L; i <= 30000L; i++) { 123 | adp_cnt_offer(tbm1, &i, sizeof(uint64_t)); 124 | } 125 | /* use xxx_cnt_get_bytes to get bitmap from context */ 126 | adp_cnt_get_bytes(tbm1, buf1, &len1); 127 | for (i = 20000L; i <= 40000L; i++) { 128 | adp_cnt_offer(tbm2, &i, sizeof(uint64_t)); 129 | } 130 | adp_cnt_get_bytes(tbm2, buf2, &len2); 131 | 132 | /* use xxx_cnt_merge_bytes to merge bitmaps to context */ 133 | adp_cnt_merge_bytes(ctx, buf1, len1, buf2, len2, NULL); 134 | esti = adp_cnt_card(ctx); 135 | 136 | printf("actual:40000, estimated: %9lu, error: %+7.2f%%\n", 137 | (long unsigned int)esti, (double)(esti - 40000) / 40000 * 100); 138 | 139 | adp_cnt_fini(tbm2); 140 | adp_cnt_fini(tbm1); 141 | adp_cnt_fini(ctx); 142 | } 143 | ``` 144 | 145 | ## For Developers 146 | 147 | Source codes should always be formatted before committing by running script 148 | `util/indent-src` in top-dir. It utilized 149 | [astyle](http://astyle.sourceforge.net/) to do the job, so you probably want to 150 | install it first. **Make sure you install astyle v2.03 or later**, as the 151 | indenting result differs from previous versions ([see 152 | here](http://astyle.sourceforge.net/news.html) for details) 153 | 154 | ## Reference 155 | 156 | ### Linear Counting 157 | 158 | * K.-Y. Whang, B. T. Vander-Zanden, and H. M. Taylor. **[A Linear-Time 159 | Probabilistic Counting Algorithm for Database Applications] 160 | (http://dblab.kaist.ac.kr/Publication/pdf/ACM90_TODS_v15n2.pdf)**. ACM 161 | Transactions on Database Systems, 15(2):208-229, 1990. 162 | 163 | ### LogLog Counting and Adaptive Counting 164 | 165 | * Marianne Durand and Philippe Flajolet. **[LogLog counting of large 166 | cardinalities](http://algo.inria.fr/flajolet/Publications/DuFl03-LNCS.pdf)**. 167 | In ESA03, volume 2832 of LNCS, pages 605-617, 2003. 168 | * Min Cai, Jianping Pan, Yu K. Kwok, and Kai Hwang. **[Fast and accurate 169 | traffic matrix measurement using adaptive cardinality counting] 170 | (http://gridsec.usc.edu/files/tr/tr-2005-12.pdf)**. In 171 | MineNet '05: Proceedings of the 2005 ACM SIGCOMM workshop on 172 | Mining network data, pages 205-206, New York, NY, USA, 2005. ACM. 173 | 174 | ### HyperLogLog Counting and HyperLogLog++ Counting 175 | 176 | * P. Flajolet, E. Fusy, O. Gandouet, and F. Meunier. 177 | **[Hyperloglog: The analysis of a near-optimal cardinality 178 | estimation algorithm](http://algo.inria.fr/flajolet/Publications/FlFuGaMe07.pdf)**. 179 | Disc. Math. and Theor. Comp. Sci., AH:127-146, 2007. 180 | * Stefan Heule, Marc Nunkesser, Alex Hall. **[HyperLogLog in Practice: 181 | Algorithmic Engineering of a State of The Art Cardinality Estimation 182 | Algorithm](http://static.googleusercontent.com/media/research.google.com/zh-CN//pubs/archive/40671.pdf)**. 183 | In Proceedings of the EDBT 2013 Conference, ACM, Genoa, Italy. 184 | 185 | The implemention refers [stream-lib](https://github.com/clearspring/stream-lib). 186 | 187 | ## Experiment 188 | 189 | The following estimating results is calculated using bitmap with length of 2^16 190 | (64k) bytes: 191 | 192 | ```bash 193 | Linear Counting with Murmurhash: 194 | actual: 50000, estimated: 50062, error: 0.12% 195 | actual: 100000, estimated: 99924, error: 0.08% 196 | actual: 150000, estimated: 149865, error: 0.09% 197 | actual: 200000, estimated: 199916, error: 0.04% 198 | actual: 250000, estimated: 250123, error: 0.05% 199 | actual: 300000, estimated: 299942, error: 0.02% 200 | actual: 350000, estimated: 349801, error: 0.06% 201 | actual: 400000, estimated: 400101, error: 0.03% 202 | actual: 450000, estimated: 449955, error: 0.01% 203 | actual: 500000, estimated: 500065, error: 0.01% 204 | 205 | Linear Counting with Lookup3hash: 206 | actual: 50000, estimated: 49835, error: 0.33% 207 | actual: 100000, estimated: 99461, error: 0.54% 208 | actual: 150000, estimated: 149006, error: 0.66% 209 | actual: 200000, estimated: 198501, error: 0.75% 210 | actual: 250000, estimated: 248365, error: 0.65% 211 | actual: 300000, estimated: 298065, error: 0.65% 212 | actual: 350000, estimated: 347504, error: 0.71% 213 | actual: 400000, estimated: 397292, error: 0.68% 214 | actual: 450000, estimated: 446700, error: 0.73% 215 | actual: 500000, estimated: 495944, error: 0.81% 216 | 217 | Hyperloglog Counting with Murmurhash: 218 | actual: 50000, estimated: 50015, error: 0.03% 219 | actual: 100000, estimated: 100048, error: 0.05% 220 | actual: 150000, estimated: 149709, error: 0.19% 221 | actual: 200000, estimated: 201595, error: 0.80% 222 | actual: 250000, estimated: 250168, error: 0.07% 223 | actual: 300000, estimated: 299864, error: 0.05% 224 | actual: 350000, estimated: 348571, error: 0.41% 225 | actual: 400000, estimated: 398583, error: 0.35% 226 | actual: 450000, estimated: 448632, error: 0.30% 227 | actual: 500000, estimated: 498330, error: 0.33% 228 | 229 | Hyperloglog Counting with Lookup3hash: 230 | actual: 50000, estimated: 49628, error: 0.74% 231 | actual: 100000, estimated: 99357, error: 0.64% 232 | actual: 150000, estimated: 148880, error: 0.75% 233 | actual: 200000, estimated: 200475, error: 0.24% 234 | actual: 250000, estimated: 249362, error: 0.26% 235 | actual: 300000, estimated: 299119, error: 0.29% 236 | actual: 350000, estimated: 349225, error: 0.22% 237 | actual: 400000, estimated: 398805, error: 0.30% 238 | actual: 450000, estimated: 448373, error: 0.36% 239 | actual: 500000, estimated: 498183, error: 0.36% 240 | 241 | Adaptive Counting with Murmurhash: 242 | actual: 50000, estimated: 50015, error: 0.03% 243 | actual: 100000, estimated: 100048, error: 0.05% 244 | actual: 150000, estimated: 149709, error: 0.19% 245 | actual: 200000, estimated: 201059, error: 0.53% 246 | actual: 250000, estimated: 249991, error: 0.00% 247 | actual: 300000, estimated: 300067, error: 0.02% 248 | actual: 350000, estimated: 349610, error: 0.11% 249 | actual: 400000, estimated: 399875, error: 0.03% 250 | actual: 450000, estimated: 450348, error: 0.08% 251 | actual: 500000, estimated: 500977, error: 0.20% 252 | 253 | Adaptive Counting with Lookup3hash: 254 | actual: 50000, estimated: 49628, error: 0.74% 255 | actual: 100000, estimated: 99357, error: 0.64% 256 | actual: 150000, estimated: 148880, error: 0.75% 257 | actual: 200000, estimated: 199895, error: 0.05% 258 | actual: 250000, estimated: 249563, error: 0.17% 259 | actual: 300000, estimated: 299047, error: 0.32% 260 | actual: 350000, estimated: 348665, error: 0.38% 261 | actual: 400000, estimated: 399266, error: 0.18% 262 | actual: 450000, estimated: 450196, error: 0.04% 263 | actual: 500000, estimated: 499516, error: 0.10% 264 | 265 | Loglog Counting with Murmurhash: 266 | actual: 50000, estimated: 59857, error: 19.71% 267 | actual: 100000, estimated: 103108, error: 3.11% 268 | actual: 150000, estimated: 150917, error: 0.61% 269 | actual: 200000, estimated: 201059, error: 0.53% 270 | actual: 250000, estimated: 249991, error: 0.00% 271 | actual: 300000, estimated: 300067, error: 0.02% 272 | actual: 350000, estimated: 349610, error: 0.11% 273 | actual: 400000, estimated: 399875, error: 0.03% 274 | actual: 450000, estimated: 450348, error: 0.08% 275 | actual: 500000, estimated: 500977, error: 0.20% 276 | 277 | Loglog Counting with Lookup3hash: 278 | actual: 50000, estimated: 59870, error: 19.74% 279 | actual: 100000, estimated: 103044, error: 3.04% 280 | actual: 150000, estimated: 150435, error: 0.29% 281 | actual: 200000, estimated: 199895, error: 0.05% 282 | actual: 250000, estimated: 249563, error: 0.17% 283 | actual: 300000, estimated: 299047, error: 0.32% 284 | actual: 350000, estimated: 348665, error: 0.38% 285 | actual: 400000, estimated: 399266, error: 0.18% 286 | actual: 450000, estimated: 450196, error: 0.04% 287 | actual: 500000, estimated: 499516, error: 0.10% 288 | 289 | HyperloglogPlus Counting with Murmurhash 64bit: 290 | actual: 50000, estimated: 49801, error: 0.40% 291 | actual: 100000, estimated: 101098, error: 1.10% 292 | actual: 150000, estimated: 151488, error: 0.99% 293 | actual: 200000, estimated: 201337, error: 0.67% 294 | actual: 250000, estimated: 252130, error: 0.85% 295 | actual: 300000, estimated: 301995, error: 0.66% 296 | actual: 350000, estimated: 352194, error: 0.63% 297 | actual: 400000, estimated: 402413, error: 0.60% 298 | actual: 450000, estimated: 454293, error: 0.95% 299 | actual: 500000, estimated: 503228, error: 0.65% 300 | ``` 301 | -------------------------------------------------------------------------------- /SConstruct: -------------------------------------------------------------------------------- 1 | AddOption('--prefix', 2 | dest='prefix', 3 | nargs=1, type='string', 4 | action='store', 5 | metavar='DIR', 6 | help='Installation prefix') 7 | 8 | # build C source 9 | SConscript('src/SConscript') 10 | 11 | ############################################################# 12 | # swig for php make and install 13 | if "install-php" in COMMAND_LINE_TARGETS: 14 | SConscript('ext/SConscript') 15 | 16 | ############################################################# 17 | # build unit-test 18 | SConscript('t/SConscript') 19 | 20 | # vi:ft=python ts=4 sw=4 et fdm=marker 21 | -------------------------------------------------------------------------------- /ext/SConscript: -------------------------------------------------------------------------------- 1 | # swig for php make and install 2 | import os 3 | import commands 4 | 5 | env = Environment( 6 | PREFIX = GetOption('prefix'), 7 | ENV = {'PATH' : os.environ['PATH']}, 8 | SWIGFLAGS=['-php'], 9 | SHLIBPREFIX="" 10 | ) 11 | env["CC"] = os.getenv("CC") or env["CC"] 12 | env["CXX"] = os.getenv("CXX") or env["CXX"] 13 | env["ENV"].update(x for x in os.environ.items() if x[0].startswith("CCC_")) 14 | 15 | env.ParseConfig("php-config --includes") 16 | phpLibExtDir = '$PREFIX/' + commands.getoutput("php-config --extension-dir") 17 | 18 | link_flags = [] 19 | # add extra linking options under OSX 20 | if env['PLATFORM'] == 'darwin': 21 | link_flags.append('-Wl,-undefined,dynamic_lookup') 22 | 23 | swigSl = env.SharedLibrary( 24 | '../php_card.so', 25 | ['php_ccard.i'], 26 | LIBS=['ccard-lib'], 27 | LIBPATH=['#'], 28 | CCFLAGS=["-Iinclude"], 29 | LINKFLAGS=link_flags, 30 | ) 31 | 32 | phpLib = env.Install(phpLibExtDir, [swigSl]) 33 | env.Alias('install-php', phpLib) 34 | env.Clean(swigSl, ["ccard.php", "php_ccard.h"]) 35 | 36 | # vi:ft=python ts=4 sw=4 et fdm=marker 37 | -------------------------------------------------------------------------------- /ext/demo.php: -------------------------------------------------------------------------------- 1 | = 0); 18 | } 19 | echo "Adaptive counting estimated: ", adp_cnt_card($ctx1), "\n"; 20 | echo "Loglog countinng estimated: ", adp_cnt_card_loglog($ctx1), "\n"; 21 | 22 | echo "Initialize adaptive counting context 2 (k=13 or len=2^13) ...\n"; 23 | $ctx2 = adp_cnt_init(13, CCARD_HASH_MURMUR64); 24 | assert(!is_null($ctx2)); 25 | 26 | echo "Count distinct items ...\n"; 27 | for($i = 1; $i < 300; $i++) { 28 | $is_new = adp_cnt_offer($ctx2, "$i"); 29 | assert($is_new >= 0); 30 | } 31 | echo "Adaptive counting estimated: ", adp_cnt_card($ctx2), "\n"; 32 | echo "Loglog countinng estimated: ", adp_cnt_card_loglog($ctx2), "\n"; 33 | 34 | echo "Reset counting context 1 ...\n"; 35 | $rc = adp_cnt_reset($ctx1); 36 | assert($rc == 0); 37 | 38 | echo "Recount distinct items ...\n"; 39 | for($i = 100; $i < 1000; $i++) { 40 | $is_new = adp_cnt_offer($ctx1, "$i"); 41 | assert($is_new >= 0); 42 | } 43 | echo "Adaptive counting estimated: ", adp_cnt_card($ctx1), "\n"; 44 | echo "Loglog countinng estimated: ", adp_cnt_card_loglog($ctx1), "\n"; 45 | 46 | echo "Merging two counting contexts ...\n"; 47 | $rc = adp_cnt_merge($ctx1, $ctx2); 48 | assert($rc == 0); 49 | 50 | echo "Adaptive counting estimated: ", adp_cnt_card($ctx1), "\n"; 51 | echo "Loglog countinng estimated: ", adp_cnt_card_loglog($ctx1), "\n"; 52 | 53 | echo "Release counting contexts ...\n"; 54 | $rc = adp_cnt_fini($ctx1); 55 | assert($rc == 0); 56 | $rc = adp_cnt_fini($ctx2); 57 | assert($rc == 0); 58 | 59 | ?> 60 | 61 | -------------------------------------------------------------------------------- /ext/php_ccard.i: -------------------------------------------------------------------------------- 1 | %module ccard 2 | %{ 3 | #include "adaptive_counting.h" 4 | struct adp_cnt_ctx_s { 5 | int err; 6 | uint8_t k; 7 | uint8_t hf; 8 | uint8_t sidx_len; 9 | double Ca; 10 | uint32_t m; 11 | uint32_t Rsum; 12 | uint32_t b_e; 13 | uint32_t bmp_len; 14 | uint8_t *M; 15 | }; 16 | %} 17 | 18 | #if defined(SWIGPHP) 19 | 20 | %typemap(in) uint32_t %{ 21 | convert_to_long_ex($input); 22 | $1 = (uint32_t) Z_LVAL_PP($input); 23 | %} 24 | 25 | %typemap(in) uint8_t %{ 26 | convert_to_long_ex($input); 27 | $1 = (uint8_t) Z_LVAL_PP($input); 28 | %} 29 | 30 | %typemap(in) (const void* buf, uint32_t len) %{ 31 | $1 = Z_STRVAL_PP($input); 32 | $2 = Z_STRLEN_PP($input); 33 | %} 34 | 35 | %typemap(in, numinputs=0) const void* IGNORE %{ 36 | $1 = NULL; 37 | %} 38 | 39 | %typemap(out) int64_t %{ 40 | ZVAL_LONG($result, $1); 41 | %} 42 | 43 | adp_cnt_ctx_t* adp_cnt_init(const void *IGNORE, uint32_t len_or_k, uint8_t hf); 44 | int64_t adp_cnt_card_loglog(adp_cnt_ctx_t *ctx); 45 | int64_t adp_cnt_card(adp_cnt_ctx_t *ctx); 46 | int adp_cnt_offer(adp_cnt_ctx_t *ctx, const void *buf, uint32_t len); 47 | int adp_cnt_reset(adp_cnt_ctx_t *ctx); 48 | 49 | %typemap(in, numinputs=1) (adp_cnt_ctx_t * ctx, void* buf, uint32_t *len) (int tmp_len)%{ 50 | if(SWIG_ConvertPtr(*$input, (void **) &$1, SWIGTYPE_p_adp_cnt_ctx_t, 0) < 0) { 51 | SWIG_PHP_Error(E_ERROR, "Type error in argument 1 of adp_cnt_get_bytes. Expected SWIGTYPE_p_adp_cnt_ctx_t"); 52 | } 53 | $3 = (uint32_t *)&tmp_len; 54 | $2 = alloca($1->m + 3); 55 | *$3 = $1->m + 3; 56 | %} 57 | 58 | %typemap(argout) (adp_cnt_ctx_t* ctx, void* buf, uint32_t *len) %{ 59 | if(result < 0) { 60 | ZVAL_NULL($result); 61 | } else { 62 | ZVAL_STRINGL($result, $2, *$3, 1); 63 | } 64 | %} 65 | 66 | %feature("action") adp_cnt_get_bytes%{ 67 | int result = adp_cnt_get_bytes(arg1,arg2,arg3); 68 | %} 69 | void adp_cnt_get_bytes(adp_cnt_ctx_t *ctx, void *buf, uint32_t *len); 70 | 71 | int adp_cnt_merge(adp_cnt_ctx_t *ctx, adp_cnt_ctx_t *tbm, ...); 72 | int adp_cnt_merge_bytes(adp_cnt_ctx_t *ctx, const void *buf, uint32_t len, ...); 73 | int adp_cnt_fini(adp_cnt_ctx_t *ctx); 74 | int adp_cnt_errnum(adp_cnt_ctx_t *ctx); 75 | const char* adp_cnt_errstr(int errn); 76 | 77 | #endif 78 | -------------------------------------------------------------------------------- /include/adaptive_counting.h: -------------------------------------------------------------------------------- 1 | #ifndef ADAPTIVE_COUNTING_H__ 2 | #define ADAPTIVE_COUNTING_H__ 3 | 4 | #include "ccard_common.h" 5 | 6 | #ifdef __cplusplus 7 | extern "C" { 8 | #endif 9 | 10 | /** 11 | * Opaque adaptive counting context type 12 | * */ 13 | typedef struct adp_cnt_ctx_s adp_cnt_ctx_t; 14 | 15 | /** 16 | * Initialize adaptive counting context with optional raw bitmap. 17 | * 18 | * @param[in] buf Pointer to the raw bitmap (no header). NULL if there's 19 | * none. 20 | * @param[in] len_or_k The length of the bitmap if buf is not NULL; 21 | * otherwise it's the base-2 logarithm of the bitmap length (no header). 22 | * @param[in] opt Additional options or'd together. Including hash function 23 | * that be applied to elements and bitmap storage format. 24 | * 25 | * @retval not-NULL An initialized context to be used with the rest of 26 | * methods. 27 | * @retval NULL If error occured. 28 | * 29 | * @see adp_cnt_fini, adp_cnt_init 30 | * */ 31 | adp_cnt_ctx_t *adp_cnt_raw_init(const void *obuf, uint32_t len_or_k, 32 | uint8_t opt); 33 | 34 | /** 35 | * Initialize adaptive counting context with optional serialized bitmap. 36 | * 37 | * @param[in] buf Pointer to the serialized bitmap (with 3 bytes header). 38 | * NULL if there's none. 39 | * @param[in] len_or_k The length of the bitmap if buf is not NULL; 40 | * otherwise it's the base-2 logarithm of the bitmap length (exclude 3 41 | * bytes header). 42 | * @param[in] opt Additional options or'd together. Including hash function 43 | * that be applied to elements and bitmap storage format. 44 | * 45 | * @retval not-NULL An initialized context to be used with the rest of 46 | * methods. 47 | * @retval NULL If error occured. 48 | * 49 | * @see adp_cnt_fini, adp_cnt_raw_init 50 | * */ 51 | adp_cnt_ctx_t *adp_cnt_init(const void *obuf, uint32_t len_or_k, 52 | uint8_t opt); 53 | 54 | /** 55 | * Retrieve the cardinality calculated from bitmap in the context using 56 | * LogLog Counting. 57 | * 58 | * @param[in] ctx Pointer to the context. 59 | * 60 | * @retval >=0 Calculated cardinality based on bitmap in the context if 61 | * success. 62 | * @retval -1 If error occured. 63 | * 64 | * @see adp_cnt_offer, adp_cnt_reset 65 | * */ 66 | int64_t adp_cnt_card_loglog(adp_cnt_ctx_t *ctx); 67 | 68 | /** 69 | * Retrieve the cardinality calculated from bitmap in the context using 70 | * Adaptive Counting. 71 | * 72 | * @param[in] ctx Pointer to the context. 73 | * 74 | * @retval >=0 Calculated cardinality based on bitmap in the context if 75 | * success. 76 | * @retval -1 If error occured. 77 | * 78 | * @see adp_cnt_offer, adp_cnt_reset 79 | * */ 80 | int64_t adp_cnt_card(adp_cnt_ctx_t *ctx); 81 | 82 | /** 83 | * Offer a object to be distinct counted. 84 | * 85 | * @param[in,out] ctx Pointer to the context. 86 | * @param[in] buf Pointer to the buffer storing object. 87 | * @param[in] len The length of the buffer. 88 | * 89 | * @retval 1 If the object affected final counting. 90 | * @retval 0 If final counting isn't affected by the object. 91 | * @retval -1 If error occured. 92 | * 93 | * @see adp_cnt_card, adp_cnt_reset 94 | * */ 95 | int adp_cnt_offer(adp_cnt_ctx_t *ctx, const void *buf, 96 | uint32_t len); 97 | 98 | /** 99 | * Reset bitmap in the context, effectively clear cardinality to zero. 100 | * 101 | * @param[in,out] ctx Pointer to the context. 102 | * 103 | * @retval 0 If success. 104 | * @retval -1 If error occured. 105 | * 106 | * @see adp_cnt_card, adp_cnt_offer 107 | * */ 108 | int adp_cnt_reset(adp_cnt_ctx_t *ctx); 109 | 110 | /** 111 | * Get the raw bitmap or bitmap length from context. 112 | * 113 | * @param[in] ctx Pointer to the context. 114 | * @param[out] buf Pointer to buffer storing returning bitmap. NULL if only 115 | * bitmap length is needed. 116 | * @param[out] len Pointer to variable storing returning bitmap length. 117 | * 118 | * @retval 0 If success. 119 | * @retval -1 If error occured. 120 | * 121 | * @see adp_cnt_merge, adp_cnt_merge_bytes, adp_cnt_get_bytes 122 | * */ 123 | int adp_cnt_get_raw_bytes(adp_cnt_ctx_t *ctx, void *buf, 124 | uint32_t *len); 125 | 126 | /** 127 | * Get the serialized bitmap or bitmap length from context. 128 | * 129 | * @param[in] ctx Pointer to the context. 130 | * @param[out] buf Pointer to buffer storing returning bitmap. NULL if only 131 | * bitmap length is needed. 132 | * @param[out] len Pointer to variable storing returning bitmap length. 133 | * 134 | * @retval 0 If success. 135 | * @retval -1 If error occured. 136 | * 137 | * @see adp_cnt_merge, adp_cnt_merge_bytes, adp_cnt_get_raw_bytes 138 | * */ 139 | int adp_cnt_get_bytes(adp_cnt_ctx_t *ctx, void *buf, 140 | uint32_t *len); 141 | 142 | /** 143 | * Merge several adaptive counting context into the current one, 144 | * effectively combined all distinct countings. 145 | * 146 | * Usage: 147 | * @code{c} 148 | * if(adp_cnt_merge(ctx, ctx_1, ctx_2, ..., ctx_n, NULL)) { 149 | * printf("Failed to merge contexts: %s", 150 | * adp_cnt_errstr(adp_cnt_errnum(ctx))); 151 | * } 152 | * @endcode 153 | * 154 | * @note All context to be merged must be of the same bitmap length, 155 | * otherwise error will be returned! 156 | * 157 | * @param[in,out] ctx Pointer to the context merging to. 158 | * @param[in] tbm Pointer to the first context to be merged. The rest 159 | * contexts will be listed sequentially with a ending NULL. 160 | * 161 | * @retval 0 if all were merged successfully. 162 | * @retval -1 if error occured. 163 | * 164 | * @see adp_cnt_merge_bytes, adp_cnt_get_bytes 165 | * */ 166 | int adp_cnt_merge(adp_cnt_ctx_t *ctx, adp_cnt_ctx_t *tbm, 167 | ...); 168 | 169 | /** 170 | * Merge several adaptive counting raw bitmaps into the current context, 171 | * effectively combined all distinct countings. 172 | * 173 | * Usage: 174 | * @code{c} 175 | * if(adp_cnt_merge_raw_bytes(ctx, buf_1, len_1, buf_2, len_2, 176 | * ..., buf_n, len_n, NULL)) { 177 | * printf("Failed to merge bitmaps: %s", 178 | * adp_cnt_errstr(adp_cnt_errnum(ctx))); 179 | * } 180 | * @endcode 181 | * 182 | * @note All bitmaps to be merged must be of the same length with the 183 | * bitmap in current context, otherwise error will be returned! 184 | * 185 | * @param[in,out] ctx Pointer to the context merging to. 186 | * @param[in] buf Pointer to the first bitmap to be merged. 187 | * @param[in] len Length of the first bitmap to be merged. The rest buf/len 188 | * pairs will be listed sequentially with a ending NULL. 189 | * 190 | * @retval 0 if all were merged successfully. 191 | * @retval -1 if error occured. 192 | * 193 | * @see adp_cnt_merge, adp_cnt_get_raw_bytes, adp_cnt_merge_bytes 194 | * */ 195 | int adp_cnt_merge_raw_bytes(adp_cnt_ctx_t *ctx, 196 | const void *buf, uint32_t len, 197 | ...); 198 | /** 199 | * Merge several adaptive counting bitmaps into the current context, 200 | * effectively combined all distinct countings. 201 | * 202 | * Usage: 203 | * @code{c} 204 | * if(adp_cnt_merge_bytes(ctx, buf_1, len_1, buf_2, len_2, 205 | * ..., buf_n, len_n, NULL)) { 206 | * printf("Failed to merge bitmaps: %s", 207 | * adp_cnt_errstr(adp_cnt_errnum(ctx))); 208 | * } 209 | * @endcode 210 | * 211 | * @note All bitmaps to be merged must be of the same length with the 212 | * bitmap in current context, otherwise error will be returned! 213 | * 214 | * @param[in,out] ctx Pointer to the context merging to. 215 | * @param[in] buf Pointer to the first bitmap to be merged. 216 | * @param[in] len Length of the first bitmap to be merged. The rest buf/len 217 | * pairs will be listed sequentially with a ending NULL. 218 | * 219 | * @retval 0 if all were merged successfully. 220 | * @retval -1 if error occured. 221 | * 222 | * @see adp_cnt_merge, adp_cnt_get_bytes, adp_cnt_merge_raw_bytes 223 | * */ 224 | int adp_cnt_merge_bytes(adp_cnt_ctx_t *ctx, 225 | const void *buf, uint32_t len, 226 | ...); 227 | 228 | /** 229 | * Finalize and release resources of the given adaptive counting context. 230 | * 231 | * @param[in] ctx Pointer to the context to release. 232 | * 233 | * @retval 0 if finalized successfully. 234 | * @retval -1 if error occured. 235 | * 236 | * @see adp_cnt_init 237 | * */ 238 | int adp_cnt_fini(adp_cnt_ctx_t *ctx); 239 | 240 | /** 241 | * Get error status of the given context. 242 | * 243 | * @param[in] ctx Pointer to the context. 244 | * 245 | * @retval <=0 Error number in the context. 246 | * 247 | * @see adp_cnt_errstr 248 | * */ 249 | int adp_cnt_errnum(adp_cnt_ctx_t *ctx); 250 | 251 | /** 252 | * Convert error status to human-friendly message. 253 | * 254 | * @param[in] errn Error number returned by adp_cnt_errnum. 255 | * 256 | * @retval not-NULL Corresponding message string. 257 | * @retval NULL Invalid error number. 258 | * 259 | * @see adp_cnt_errnum 260 | * */ 261 | const char *adp_cnt_errstr(int errn); 262 | 263 | /** 264 | * Adaptive counting algorithm definition 265 | * */ 266 | extern ccard_algo_t *adp_algo; 267 | 268 | #ifdef __cplusplus 269 | } 270 | #endif 271 | 272 | #endif 273 | 274 | /* vi:ft=c ts=4 sw=4 fdm=marker et 275 | * */ 276 | 277 | -------------------------------------------------------------------------------- /include/ccard_common.h: -------------------------------------------------------------------------------- 1 | #ifndef CCARD_COMMON_H__ 2 | #define CCARD_COMMON_H__ 3 | 4 | #include 5 | #include "sparse_bitmap.h" 6 | 7 | #ifdef __cplusplus 8 | extern "C" { 9 | #endif 10 | 11 | /** 12 | * Predefined error codes 13 | * */ 14 | enum { 15 | CCARD_OK = 0, /**< No error */ 16 | CCARD_ERR_INVALID_CTX = -1, /**< Invalid algorihm context */ 17 | CCARD_ERR_MERGE_FAILED = -2, /**< Merge failed */ 18 | CCARD_ERR_INVALID_ARGUMENT = -3, /**< Invalid argument */ 19 | CCARD_ERR_PLACEHOLDER 20 | }; 21 | 22 | /** 23 | * Algorithms 24 | * */ 25 | enum { 26 | CCARD_ALGO_ADAPTIVE = 1, 27 | CCARD_ALGO_HYPERLOGLOG = 2, 28 | CCARD_ALGO_LINEAR = 3, 29 | CCARD_ALGO_HYPERLOGLOGPLUS = 4, 30 | CCARD_ALGO_PLACEHOLDER 31 | }; 32 | 33 | /** 34 | * Hash functions 35 | * */ 36 | enum { 37 | CCARD_HASH_MURMUR = 1, 38 | CCARD_HASH_LOOKUP3 = 2, 39 | CCARD_HASH_MURMUR64 = 3, 40 | CCARD_HASH_PLACEHOLDER 41 | }; 42 | 43 | /** 44 | * C-card algorithm definition 45 | * */ 46 | typedef struct ccard_algo_s { 47 | /** Allocate algorithm ctx with optional raw data */ 48 | void *(*raw_init) (const void *buf, uint32_t len_or_hint, 49 | uint8_t opt); 50 | /** Allocate algorithm ctx with optional external data */ 51 | void *(*init) (const void *buf, uint32_t len_or_hint, 52 | uint8_t opt); 53 | /** Get cardinality from algorithm ctx */ 54 | int64_t (*card) (void *ctx); 55 | /** Offer a new item to be counted */ 56 | int (*offer) (void *ctx, const void *buf, 57 | uint32_t len); 58 | /** Reset count to zero */ 59 | int (*reset) (void *ctx); 60 | /** Get raw bytes of the algorithm state */ 61 | int (*get_raw_bytes) (void *ctx, void *buf, 62 | uint32_t *len); 63 | /** Get serialized bytes of the algorithm state */ 64 | int (*get_bytes) (void *ctx, void *buf, 65 | uint32_t *len); 66 | /** Merge several algorithm ctx and combine their counts */ 67 | int (*merge) (void *ctx, void *tbm, ...); 68 | /** Merge several raw bytes and combine their counts */ 69 | int (*merge_raw_bytes) (void *ctx, const void *buf, 70 | uint32_t len, ...); 71 | /** Merge several serialized bytes and combine their counts */ 72 | int (*merge_bytes) (void *ctx, const void *buf, 73 | uint32_t len, ...); 74 | /** Deallocate algorithm ctx */ 75 | int (*fini) (void *ctx); 76 | /** Get error code from algorithm ctx */ 77 | int (*errnum) (void *ctx); 78 | /** Convert error code to human-friendly messages */ 79 | const char *(*errstr) (int errn); 80 | } ccard_algo_t; 81 | 82 | #ifdef __cplusplus 83 | } 84 | #endif 85 | 86 | #endif 87 | 88 | /* vi:ft=c ts=4 sw=4 fdm=marker et 89 | * */ 90 | 91 | -------------------------------------------------------------------------------- /include/hyperloglog_counting.h: -------------------------------------------------------------------------------- 1 | #ifndef HYPERLOGLOG_COUNTING_H__ 2 | #define HYPERLOGLOG_COUNTING_H__ 3 | 4 | #include "ccard_common.h" 5 | 6 | #ifdef __cplusplus 7 | extern "C" { 8 | #endif 9 | 10 | /** 11 | * Opaque hyperloglog counting context type 12 | * */ 13 | typedef struct hll_cnt_ctx_s hll_cnt_ctx_t; 14 | 15 | /** 16 | * Initialize hyperloglog counting context with optional raw bitmap. 17 | * 18 | * @param[in] buf Pointer to the raw bitmap. NULL if there's none. 19 | * @param[in] len_or_k The length of the bitmap if buf is not NULL; 20 | * otherwise it's the base-2 logarithm of the bitmap length. 21 | * @param[in] hf Hash function that be applied to elements. 22 | * 23 | * @retval not-NULL An initialized context to be used with the rest of 24 | * methods. 25 | * @retval NULL If error occured. 26 | * 27 | * @see hll_cnt_fini, hll_cnt_init 28 | * */ 29 | hll_cnt_ctx_t *hll_cnt_raw_init(const void *obuf, uint32_t len_or_k, 30 | uint8_t hf); 31 | 32 | /** 33 | * Initialize hyperloglog counting context with optional serialized bitmap. 34 | * 35 | * @param[in] buf Pointer to the serialized bitmap. NULL if there's none. 36 | * @param[in] len_or_k The length of the bitmap if buf is not NULL; 37 | * otherwise it's the base-2 logarithm of the bitmap length. 38 | * @param[in] hf Hash function that be applied to elements. 39 | * 40 | * @retval not-NULL An initialized context to be used with the rest of 41 | * methods. 42 | * @retval NULL If error occured. 43 | * 44 | * @see hll_cnt_fini, hll_cnt_raw_init 45 | * */ 46 | hll_cnt_ctx_t *hll_cnt_init(const void *obuf, uint32_t len_or_k, 47 | uint8_t hf); 48 | 49 | /** 50 | * Retrieve the cardinality calculated from bitmap in the context using 51 | * Hyperloglog Counting. 52 | * 53 | * @param[in] ctx Pointer to the context. 54 | * 55 | * @retval >=0 Calculated cardinality based on bitmap in the context if 56 | * success. 57 | * @retval -1 If error occured. 58 | * 59 | * @see hll_cnt_offer, hll_cnt_reset 60 | * */ 61 | int64_t hll_cnt_card(hll_cnt_ctx_t *ctx); 62 | 63 | /** 64 | * Offer a object to be distinct counted. 65 | * 66 | * @param[in,out] ctx Pointer to the context. 67 | * @param[in] buf Pointer to the buffer storing object. 68 | * @param[in] len The length of the buffer. 69 | * 70 | * @retval 1 If the object affected final counting. 71 | * @retval 0 If final counting isn't affected by the object. 72 | * @retval -1 If error occured. 73 | * 74 | * @see hll_cnt_card, hll_cnt_reset 75 | * */ 76 | int hll_cnt_offer(hll_cnt_ctx_t *ctx, const void *buf, 77 | uint32_t len); 78 | 79 | /** 80 | * Reset bitmap in the context, effectively clear cardinality to zero. 81 | * 82 | * @param[in,out] ctx Pointer to the context. 83 | * 84 | * @retval 0 If success. 85 | * @retval -1 If error occured. 86 | * 87 | * @see hll_cnt_card, hll_cnt_offer 88 | * */ 89 | int hll_cnt_reset(hll_cnt_ctx_t *ctx); 90 | 91 | /** 92 | * Get the raw bitmap or bitmap length from context. 93 | * 94 | * @param[in] ctx Pointer to the context. 95 | * @param[out] buf Pointer to buffer storing returning bitmap. NULL if only 96 | * bitmap length is needed. 97 | * @param[out] len Pointer to variable storing returning bitmap length. 98 | * 99 | * @retval 0 If success. 100 | * @retval -1 If error occured. 101 | * 102 | * @see hll_cnt_merge, hll_cnt_merge_raw_bytes, hll_cnt_get_bytes 103 | * */ 104 | int hll_cnt_get_raw_bytes(hll_cnt_ctx_t *ctx, void *buf, 105 | uint32_t *len); 106 | 107 | /** 108 | * Get the serialized bitmap or bitmap length from context. 109 | * 110 | * @param[in] ctx Pointer to the context. 111 | * @param[out] buf Pointer to buffer storing returning bitmap. NULL if only 112 | * bitmap length is needed. 113 | * @param[out] len Pointer to variable storing returning bitmap length. 114 | * 115 | * @retval 0 If success. 116 | * @retval -1 If error occured. 117 | * 118 | * @see hll_cnt_merge, hll_cnt_merge_bytes, hll_cnt_get_raw_bytes 119 | * */ 120 | int hll_cnt_get_bytes(hll_cnt_ctx_t *ctx, void *buf, 121 | uint32_t *len); 122 | 123 | /** 124 | * Merge several hyperloglog counting context into the current one, 125 | * effectively combined all distinct countings. 126 | * 127 | * Usage: 128 | * @code{c} 129 | * if(hll_cnt_merge(ctx, ctx_1, ctx_2, ..., ctx_n, NULL)) { 130 | * printf("Failed to merge contexts: %s", 131 | * hll_cnt_errstr(hll_cnt_errnum(ctx))); 132 | * } 133 | * @endcode 134 | * 135 | * @note All context to be merged must be of the same bitmap length, 136 | * otherwise error will be returned! 137 | * 138 | * @param[in,out] ctx Pointer to the context merging to. 139 | * @param[in] tbm Pointer to the first context to be merged. The rest 140 | * contexts will be listed sequentially with a ending NULL. 141 | * 142 | * @retval 0 if all were merged successfully. 143 | * @retval -1 if error occured. 144 | * 145 | * @see hll_cnt_merge_bytes, hll_cnt_get_bytes 146 | * */ 147 | int hll_cnt_merge(hll_cnt_ctx_t *ctx, hll_cnt_ctx_t *tbm, 148 | ...); 149 | 150 | /** 151 | * Merge several hyperloglog counting bitmap into the current context, 152 | * effectively combined all distinct countings. 153 | * 154 | * Usage: 155 | * @code{c} 156 | * if(hll_cnt_merge_raw_bytes(ctx, buf_1, len_1, buf_2, len_2, 157 | * ..., buf_n, len_n, NULL)) { 158 | * printf("Failed to merge bitmaps: %s", 159 | * hll_cnt_errstr(hll_cnt_errnum(ctx))); 160 | * } 161 | * @endcode 162 | * 163 | * @note All bitmap to be merged must be of the same length with the bitmap 164 | * in current context, otherwise error will be returned! 165 | * 166 | * @param[in,out] ctx Pointer to the context merging to. 167 | * @param[in] buf Pointer to the first bitmap to be merged. 168 | * @param[in] len Length of the first bitmap to be merged. The rest buf/len 169 | * pairs will be listed sequentially with a ending NULL. 170 | * 171 | * @retval 0 if all were merged successfully. 172 | * @retval -1 if error occured. 173 | * 174 | * @see hll_cnt_merge, hll_cnt_get_bytes, hll_cnt_merge_bytes 175 | * */ 176 | int hll_cnt_merge_raw_bytes(hll_cnt_ctx_t *ctx, 177 | const void *buf, uint32_t len, 178 | ...); 179 | 180 | /** 181 | * Merge several hyperloglog counting bitmap into the current context, 182 | * effectively combined all distinct countings. 183 | * 184 | * Usage: 185 | * @code{c} 186 | * if(hll_cnt_merge_bytes(ctx, buf_1, len_1, buf_2, len_2, 187 | * ..., buf_n, len_n, NULL)) { 188 | * printf("Failed to merge bitmaps: %s", 189 | * hll_cnt_errstr(hll_cnt_errnum(ctx))); 190 | * } 191 | * @endcode 192 | * 193 | * @note All bitmap to be merged must be of the same length with the bitmap 194 | * in current context, otherwise error will be returned! 195 | * 196 | * @param[in,out] ctx Pointer to the context merging to. 197 | * @param[in] buf Pointer to the first bitmap to be merged. 198 | * @param[in] len Length of the first bitmap to be merged. The rest buf/len 199 | * pairs will be listed sequentially with a ending NULL. 200 | * 201 | * @retval 0 if all were merged successfully. 202 | * @retval -1 if error occured. 203 | * 204 | * @see hll_cnt_merge, hll_cnt_get_bytes, hll_cnt_merge_raw_bytes 205 | * */ 206 | int hll_cnt_merge_bytes(hll_cnt_ctx_t *ctx, 207 | const void *buf, uint32_t len, 208 | ...); 209 | 210 | /** 211 | * Finalize and release resources of the given hyperloglog counting 212 | * context. 213 | * 214 | * @param[in] ctx Pointer to the context to release. 215 | * 216 | * @retval 0 if finalized successfully. 217 | * @retval -1 if error occured. 218 | * 219 | * @see hll_cnt_init 220 | * */ 221 | int hll_cnt_fini(hll_cnt_ctx_t *ctx); 222 | 223 | /** 224 | * Get error status of the given context. 225 | * 226 | * @param[in] ctx Pointer to the context. 227 | * 228 | * @retval <=0 Error number in the context. 229 | * 230 | * @see hll_cnt_errstr 231 | * */ 232 | int hll_cnt_errnum(hll_cnt_ctx_t *ctx); 233 | 234 | /** 235 | * Convert error status to human-friendly message. 236 | * 237 | * @param[in] errn Error number returned by hll_cnt_errnum. 238 | * 239 | * @retval not-NULL Corresponding message string. 240 | * @retval NULL Invalid error number. 241 | * 242 | * @see hll_cnt_errnum 243 | * */ 244 | const char *hll_cnt_errstr(int errn); 245 | 246 | /** 247 | * Hyperloglog counting algorithm definition 248 | * */ 249 | extern ccard_algo_t *hll_algo; 250 | 251 | #ifdef __cplusplus 252 | } 253 | #endif 254 | 255 | #endif 256 | 257 | /* vi:ft=c ts=4 sw=4 fdm=marker et 258 | * */ 259 | 260 | -------------------------------------------------------------------------------- /include/hyperloglogplus_counting.h: -------------------------------------------------------------------------------- 1 | #ifndef HYPERLOGLOGPLUS_COUNTING_H__ 2 | #define HYPERLOGLOGPLUS_COUNTING_H__ 3 | 4 | #include "ccard_common.h" 5 | 6 | #ifdef __cplusplus 7 | extern "C" { 8 | #endif 9 | 10 | /** 11 | * Opaque hyperloglogplus counting context type 12 | * */ 13 | typedef struct hllp_cnt_ctx_s hllp_cnt_ctx_t; 14 | 15 | /** 16 | * Initialize hyperloglogplus counting context with optional raw bitmap. 17 | * 18 | * @param[in] buf Pointer to the raw bitmap. NULL if there's none. 19 | * @param[in] len_or_k The length of the bitmap if buf is not NULL; 20 | * otherwise it's the base-2 logarithm of the bitmap length. 21 | * 22 | * @retval not-NULL An initialized context to be used with the rest of 23 | * methods. 24 | * @retval NULL If error occured. 25 | * 26 | * @see hllp_cnt_fini, hllp_cnt_init 27 | * */ 28 | hllp_cnt_ctx_t *hllp_cnt_raw_init(const void *obuf, uint32_t len_or_k); 29 | 30 | /** 31 | * Initialize hyperloglogplus counting context with optional serialized bitmap. 32 | * 33 | * @param[in] buf Pointer to the serialized bitmap. NULL if there's none. 34 | * @param[in] len_or_k The length of the bitmap if buf is not NULL; 35 | * otherwise it's the base-2 logarithm of the bitmap length. 36 | * 37 | * @retval not-NULL An initialized context to be used with the rest of 38 | * methods. 39 | * @retval NULL If error occured. 40 | * 41 | * @see hllp_cnt_fini, hllp_cnt_raw_init 42 | * */ 43 | hllp_cnt_ctx_t *hllp_cnt_init(const void *obuf, uint32_t len_or_k); 44 | 45 | /** 46 | * Retrieve the cardinality calculated from bitmap in the context using 47 | * Hyperloglogplus Counting. 48 | * 49 | * @param[in] ctx Pointer to the context. 50 | * 51 | * @retval >=0 Calculated cardinality based on bitmap in the context if 52 | * success. 53 | * @retval -1 If error occured. 54 | * 55 | * @see hllp_cnt_offer, hllp_cnt_reset 56 | * */ 57 | int64_t hllp_cnt_card(hllp_cnt_ctx_t *ctx); 58 | 59 | /** 60 | * Offer a object to be distinct counted. 61 | * 62 | * @param[in,out] ctx Pointer to the context. 63 | * @param[in] buf Pointer to the buffer storing object. 64 | * @param[in] len The length of the buffer. 65 | * 66 | * @retval 1 If the object affected final counting. 67 | * @retval 0 If final counting isn't affected by the object. 68 | * @retval -1 If error occured. 69 | * 70 | * @see hllp_cnt_card, hllp_cnt_reset 71 | * */ 72 | int hllp_cnt_offer(hllp_cnt_ctx_t *ctx, const void *buf, 73 | uint32_t len); 74 | 75 | /** 76 | * Reset bitmap in the context, effectively clear cardinality to zero. 77 | * 78 | * @param[in,out] ctx Pointer to the context. 79 | * 80 | * @retval 0 If success. 81 | * @retval -1 If error occured. 82 | * 83 | * @see hllp_cnt_card, hllp_cnt_offer 84 | * */ 85 | int hllp_cnt_reset(hllp_cnt_ctx_t *ctx); 86 | 87 | /** 88 | * Get the raw bitmap or bitmap length from context. 89 | * 90 | * @param[in] ctx Pointer to the context. 91 | * @param[out] buf Pointer to buffer storing returning bitmap. NULL if only 92 | * bitmap length is needed. 93 | * @param[out] len Pointer to variable storing returning bitmap length. 94 | * 95 | * @retval 0 If success. 96 | * @retval -1 If error occured. 97 | * 98 | * @see hllp_cnt_merge, hllp_cnt_merge_raw_bytes, hllp_cnt_get_bytes 99 | * */ 100 | int hllp_cnt_get_raw_bytes(hllp_cnt_ctx_t *ctx, void *buf, 101 | uint32_t *len); 102 | 103 | /** 104 | * Get the serialized bitmap or bitmap length from context. 105 | * 106 | * @param[in] ctx Pointer to the context. 107 | * @param[out] buf Pointer to buffer storing returning bitmap. NULL if only 108 | * bitmap length is needed. 109 | * @param[out] len Pointer to variable storing returning bitmap length. 110 | * 111 | * @retval 0 If success. 112 | * @retval -1 If error occured. 113 | * 114 | * @see hllp_cnt_merge, hllp_cnt_merge_bytes, hllp_cnt_get_raw_bytes 115 | * */ 116 | int hllp_cnt_get_bytes(hllp_cnt_ctx_t *ctx, void *buf, 117 | uint32_t *len); 118 | 119 | /** 120 | * Merge several hyperloglogplus counting context into the current one, 121 | * effectively combined all distinct countings. 122 | * 123 | * Usage: 124 | * @code{c} 125 | * if(hllp_cnt_merge(ctx, ctx_1, ctx_2, ..., ctx_n, NULL)) { 126 | * printf("Failed to merge contexts: %s", 127 | * hllp_cnt_errstr(hllp_cnt_errnum(ctx))); 128 | * } 129 | * @endcode 130 | * 131 | * @note All context to be merged must be of the same bitmap length, 132 | * otherwise error will be returned! 133 | * 134 | * @param[in,out] ctx Pointer to the context merging to. 135 | * @param[in] tbm Pointer to the first context to be merged. The rest 136 | * contexts will be listed sequentially with a ending NULL. 137 | * 138 | * @retval 0 if all were merged successfully. 139 | * @retval -1 if error occured. 140 | * 141 | * @see hllp_cnt_merge_bytes, hllp_cnt_get_bytes 142 | * */ 143 | int hllp_cnt_merge(hllp_cnt_ctx_t *ctx, hllp_cnt_ctx_t *tbm, 144 | ...); 145 | 146 | /** 147 | * Merge several hyperloglogplus counting bitmap into the current context, 148 | * effectively combined all distinct countings. 149 | * 150 | * Usage: 151 | * @code{c} 152 | * if(hllp_cnt_merge_raw_bytes(ctx, buf_1, len_1, buf_2, len_2, 153 | * ..., buf_n, len_n, NULL)) { 154 | * printf("Failed to merge bitmaps: %s", 155 | * hllp_cnt_errstr(hllp_cnt_errnum(ctx))); 156 | * } 157 | * @endcode 158 | * 159 | * @note All bitmap to be merged must be of the same length with the bitmap 160 | * in current context, otherwise error will be returned! 161 | * 162 | * @param[in,out] ctx Pointer to the context merging to. 163 | * @param[in] buf Pointer to the first bitmap to be merged. 164 | * @param[in] len Length of the first bitmap to be merged. The rest buf/len 165 | * pairs will be listed sequentially with a ending NULL. 166 | * 167 | * @retval 0 if all were merged successfully. 168 | * @retval -1 if error occured. 169 | * 170 | * @see hllp_cnt_merge, hllp_cnt_get_bytes, hllp_cnt_merge_bytes 171 | * */ 172 | int hllp_cnt_merge_raw_bytes(hllp_cnt_ctx_t *ctx, 173 | const void *buf, uint32_t len, 174 | ...); 175 | 176 | /** 177 | * Merge several hyperloglogplus counting bitmap into the current context, 178 | * effectively combined all distinct countings. 179 | * 180 | * Usage: 181 | * @code{c} 182 | * if(hllp_cnt_merge_bytes(ctx, buf_1, len_1, buf_2, len_2, 183 | * ..., buf_n, len_n, NULL)) { 184 | * printf("Failed to merge bitmaps: %s", 185 | * hllp_cnt_errstr(hllp_cnt_errnum(ctx))); 186 | * } 187 | * @endcode 188 | * 189 | * @note All bitmap to be merged must be of the same length with the bitmap 190 | * in current context, otherwise error will be returned! 191 | * 192 | * @param[in,out] ctx Pointer to the context merging to. 193 | * @param[in] buf Pointer to the first bitmap to be merged. 194 | * @param[in] len Length of the first bitmap to be merged. The rest buf/len 195 | * pairs will be listed sequentially with a ending NULL. 196 | * 197 | * @retval 0 if all were merged successfully. 198 | * @retval -1 if error occured. 199 | * 200 | * @see hllp_cnt_merge, hllp_cnt_get_bytes, hllp_cnt_merge_raw_bytes 201 | * */ 202 | int hllp_cnt_merge_bytes(hllp_cnt_ctx_t *ctx, 203 | const void *buf, uint32_t len, 204 | ...); 205 | 206 | /** 207 | * Finalize and release resources of the given hyperloglogplus counting 208 | * context. 209 | * 210 | * @param[in] ctx Pointer to the context to release. 211 | * 212 | * @retval 0 if finalized successfully. 213 | * @retval -1 if error occured. 214 | * 215 | * @see hllp_cnt_init 216 | * */ 217 | int hllp_cnt_fini(hllp_cnt_ctx_t *ctx); 218 | 219 | /** 220 | * Get error status of the given context. 221 | * 222 | * @param[in] ctx Pointer to the context. 223 | * 224 | * @retval <=0 Error number in the context. 225 | * 226 | * @see hllp_cnt_errstr 227 | * */ 228 | int hllp_cnt_errnum(hllp_cnt_ctx_t *ctx); 229 | 230 | /** 231 | * Convert error status to human-friendly message. 232 | * 233 | * @param[in] errn Error number returned by hllp_cnt_errnum. 234 | * 235 | * @retval not-NULL Corresponding message string. 236 | * @retval NULL Invalid error number. 237 | * 238 | * @see hllp_cnt_errnum 239 | * */ 240 | const char *hllp_cnt_errstr(int errn); 241 | 242 | /** 243 | * Hyperloglogplus counting algorithm definition 244 | * */ 245 | extern ccard_algo_t *hllp_algo; 246 | 247 | #ifdef __cplusplus 248 | } 249 | #endif 250 | 251 | #endif 252 | 253 | /* vi:ft=c ts=4 sw=4 fdm=marker et 254 | * */ 255 | 256 | -------------------------------------------------------------------------------- /include/linear_counting.h: -------------------------------------------------------------------------------- 1 | #ifndef LINEAR_COUNTING_H__ 2 | #define LINEAR_COUNTING_H__ 3 | 4 | #include "ccard_common.h" 5 | 6 | #ifdef __cplusplus 7 | extern "C" { 8 | #endif 9 | 10 | /** 11 | * Opaque linear counting context type 12 | * */ 13 | typedef struct lnr_cnt_ctx_s lnr_cnt_ctx_t; 14 | 15 | /** 16 | * Initialize linear counting context with optional raw bitmap. 17 | * 18 | * @param[in] buf Pointer to the raw bitmap (no header). NULL if there's 19 | * none. 20 | * @param[in] len_or_k The length of the bitmap if buf is not NULL; 21 | * otherwise it's the base-2 logarithm of the bitmap length (no header). 22 | * @param[in] hf Hash function that be applied to elements. 23 | * 24 | * @retval not-NULL An initialized context to be used with the rest of 25 | * methods. 26 | * @retval NULL If error occured. 27 | * 28 | * @see lnr_cnt_fini 29 | * */ 30 | lnr_cnt_ctx_t *lnr_cnt_raw_init(const void *obuf, uint32_t len_or_k, 31 | uint8_t hf); 32 | 33 | /** 34 | * Initialize linear counting context with optional serialized bitmap. 35 | * 36 | * @param[in] buf Pointer to the serialized bitmap (with 3 bytes header). 37 | * NULL if there's none. 38 | * @param[in] len_or_k The length of the bitmap if buf is not NULL; 39 | * otherwise it's the base-2 logarithm of the bitmap length (exclude 3 40 | * bytes header). 41 | * @param[in] hf Hash function that be applied to elements. 42 | * 43 | * @retval not-NULL An initialized context to be used with the rest of 44 | * methods. 45 | * @retval NULL If error occured. 46 | * 47 | * @see lnr_cnt_fini 48 | * */ 49 | lnr_cnt_ctx_t *lnr_cnt_init(const void *obuf, uint32_t len_or_k, 50 | uint8_t hf); 51 | 52 | /** 53 | * Retrieve the cardinality calculated from bitmap in the context using 54 | * Linear Counting. 55 | * 56 | * @param[in] ctx Pointer to the context. 57 | * 58 | * @retval >=0 Calculated cardinality based on bitmap in the context if 59 | * success. 60 | * @retval -1 If error occured. 61 | * 62 | * @see lnr_cnt_offer, lnr_cnt_reset 63 | * */ 64 | int64_t lnr_cnt_card(lnr_cnt_ctx_t *ctx); 65 | 66 | /** 67 | * Offer a object to be distinct counted. 68 | * 69 | * @param[in,out] ctx Pointer to the context. 70 | * @param[in] buf Pointer to the buffer storing object. 71 | * @param[in] len The length of the buffer. 72 | * 73 | * @retval 1 If the object affected final counting. 74 | * @retval 0 If final counting isn't affected by the object. 75 | * @retval -1 If error occured. 76 | * 77 | * @see lnr_cnt_card, lnr_cnt_reset 78 | * */ 79 | int lnr_cnt_offer(lnr_cnt_ctx_t *ctx, const void *buf, 80 | uint32_t len); 81 | 82 | /** 83 | * Reset bitmap in the context, effectively clear cardinality to zero. 84 | * 85 | * @param[in,out] ctx Pointer to the context. 86 | * 87 | * @retval 0 If success. 88 | * @retval -1 If error occured. 89 | * 90 | * @see lnr_cnt_card, lnr_cnt_offer 91 | * */ 92 | int lnr_cnt_reset(lnr_cnt_ctx_t *ctx); 93 | 94 | /** 95 | * Get the raw bitmap or bitmap length from context. 96 | * 97 | * @param[in] ctx Pointer to the context. 98 | * @param[out] buf Pointer to buffer storing returning bitmap. NULL if only 99 | * bitmap length is needed. 100 | * @param[out] len Pointer to variable storing returning bitmap length. 101 | * 102 | * @retval 0 If success. 103 | * @retval -1 If error occured. 104 | * 105 | * @see lnr_cnt_merge, lnr_cnt_merge_bytes, lnr_cnt_get_bytes 106 | * */ 107 | int lnr_cnt_get_raw_bytes(lnr_cnt_ctx_t *ctx, void *buf, 108 | uint32_t *len); 109 | 110 | /** 111 | * Get the serialized bitmap or bitmap length from context. 112 | * 113 | * @param[in] ctx Pointer to the context. 114 | * @param[out] buf Pointer to buffer storing returning bitmap. NULL if only 115 | * bitmap length is needed. 116 | * @param[out] len Pointer to variable storing returning bitmap length. 117 | * 118 | * @retval 0 If success. 119 | * @retval -1 If error occured. 120 | * 121 | * @see lnr_cnt_merge, lnr_cnt_merge_bytes, lnr_cnt_get_raw_bytes 122 | * */ 123 | int lnr_cnt_get_bytes(lnr_cnt_ctx_t *ctx, void *buf, 124 | uint32_t *len); 125 | 126 | /** 127 | * Merge several linear counting context into the current one, 128 | * effectively combined all distinct countings. 129 | * 130 | * Usage: 131 | * @code{c} 132 | * if(lnr_cnt_merge(ctx, ctx_1, ctx_2, ..., ctx_n, NULL)) { 133 | * printf("Failed to merge contexts: %s", 134 | * lnr_cnt_errstr(lnr_cnt_errnum(ctx))); 135 | * } 136 | * @endcode 137 | * 138 | * @note All context to be merged must be of the same bitmap length, 139 | * otherwise error will be returned! 140 | * 141 | * @param[in,out] ctx Pointer to the context merging to. 142 | * @param[in] tbm Pointer to the first context to be merged. The rest 143 | * contexts will be listed sequentially with a ending NULL. 144 | * 145 | * @retval 0 if all were merged successfully. 146 | * @retval -1 if error occured. 147 | * 148 | * @see lnr_cnt_merge_bytes, lnr_cnt_get_bytes 149 | * */ 150 | int lnr_cnt_merge(lnr_cnt_ctx_t *ctx, lnr_cnt_ctx_t *tbm, 151 | ...); 152 | 153 | /** 154 | * Merge several linear counting bitmap into the current context, 155 | * effectively combined all distinct countings. 156 | * 157 | * Usage: 158 | * @code{c} 159 | * if(lnr_cnt_merge_raw_bytes(ctx, buf_1, len_1, buf_2, len_2, 160 | * ..., buf_n, len_n, NULL)) { 161 | * printf("Failed to merge bitmaps: %s", 162 | * lnr_cnt_errstr(lnr_cnt_errnum(ctx))); 163 | * } 164 | * @endcode 165 | * 166 | * @note All bitmap to be merged must be of the same length with the bitmap 167 | * in current context, otherwise error will be returned! 168 | * 169 | * @param[in,out] ctx Pointer to the context merging to. 170 | * @param[in] buf Pointer to the first bitmap to be merged. 171 | * @param[in] len Length of the first bitmap to be merged. The rest buf/len 172 | * pairs will be listed sequentially with a ending NULL. 173 | * 174 | * @retval 0 if all were merged successfully. 175 | * @retval -1 if error occured. 176 | * 177 | * @see lnr_cnt_merge, lnr_cnt_get_raw_bytes, lnr_cnt_merge_bytes 178 | * */ 179 | int lnr_cnt_merge_raw_bytes(lnr_cnt_ctx_t *ctx, 180 | const void *buf, uint32_t len, 181 | ...); 182 | 183 | /** 184 | * Merge several linear counting bitmap into the current context, 185 | * effectively combined all distinct countings. 186 | * 187 | * Usage: 188 | * @code{c} 189 | * if(lnr_cnt_merge_bytes(ctx, buf_1, len_1, buf_2, len_2, 190 | * ..., buf_n, len_n, NULL)) { 191 | * printf("Failed to merge bitmaps: %s", 192 | * lnr_cnt_errstr(lnr_cnt_errnum(ctx))); 193 | * } 194 | * @endcode 195 | * 196 | * @note All bitmap to be merged must be of the same length with the bitmap 197 | * in current context, otherwise error will be returned! 198 | * 199 | * @param[in,out] ctx Pointer to the context merging to. 200 | * @param[in] buf Pointer to the first bitmap to be merged. 201 | * @param[in] len Length of the first bitmap to be merged. The rest buf/len 202 | * pairs will be listed sequentially with a ending NULL. 203 | * 204 | * @retval 0 if all were merged successfully. 205 | * @retval -1 if error occured. 206 | * 207 | * @see lnr_cnt_merge, lnr_cnt_get_bytes, lnr_cnt_merge_raw_bytes 208 | * */ 209 | int lnr_cnt_merge_bytes(lnr_cnt_ctx_t *ctx, 210 | const void *buf, uint32_t len, 211 | ...); 212 | 213 | /** 214 | * Finalize and release resources of the given linear counting context. 215 | * 216 | * @param[in] ctx Pointer to the context to release. 217 | * 218 | * @retval 0 if finalized successfully. 219 | * @retval -1 if error occured. 220 | * 221 | * @see lnr_cnt_init 222 | * */ 223 | int lnr_cnt_fini(lnr_cnt_ctx_t *ctx); 224 | 225 | /** 226 | * Get error status of the given context. 227 | * 228 | * @param[in] ctx Pointer to the context. 229 | * 230 | * @retval <=0 Error number in the context. 231 | * 232 | * @see lnr_cnt_errstr 233 | * */ 234 | int lnr_cnt_errnum(lnr_cnt_ctx_t *ctx); 235 | 236 | /** 237 | * Convert error status to human-friendly message. 238 | * 239 | * @param[in] errn Error number returned by lnr_cnt_errnum. 240 | * 241 | * @retval not-NULL Corresponding message string. 242 | * @retval NULL Invalid error number. 243 | * 244 | * @see lnr_cnt_errnum 245 | * */ 246 | const char *lnr_cnt_errstr(int errn); 247 | 248 | /** 249 | * Linear counting algorithm definition 250 | * */ 251 | extern ccard_algo_t *lnr_algo; 252 | 253 | #ifdef __cplusplus 254 | } 255 | #endif 256 | 257 | #endif 258 | 259 | /* vi:ft=c ts=4 sw=4 fdm=marker et 260 | * */ 261 | 262 | -------------------------------------------------------------------------------- /include/lookup3hash.h: -------------------------------------------------------------------------------- 1 | #ifndef LOOKUP3HASH_H__ 2 | #define LOOKUP3HASH_H__ 3 | 4 | #include 5 | 6 | #ifdef __cplusplus 7 | extern "C" { 8 | #endif 9 | 10 | /** 11 | * A implementation of hashword from lookup3.c by Bob Jenkins 12 | * (original source). 13 | * 14 | * @param k the key to hash 15 | * @param offset offset of the start of the key 16 | * @param length length of the key 17 | * @param initval initial value to fold into the hash 18 | * @return the 32 bit hash code 19 | * */ 20 | uint32_t lookup3(const uint32_t *k, uint32_t offset, 21 | uint32_t length, uint32_t initval); 22 | 23 | /** 24 | * Identical to lookup3, except initval is biased by -(length<<2). 25 | * This is equivalent to leaving out the length factor in the initial state. 26 | * {@code lookup3ycs(k,offset,length,initval) == lookup3(k,offset,length,initval-(length<<2))} 27 | * and 28 | * {@code lookup3ycs(k,offset,length,initval+(length<<2)) == lookup3(k,offset,length,initval)} 29 | * */ 30 | uint32_t lookup3ycs(const uint32_t *k, uint32_t offset, 31 | uint32_t length, uint32_t initval); 32 | 33 | /** 34 | *

The hash value of a character sequence is defined to be the hash of 35 | * it's unicode code points, according to {@link #lookup3ycs(uint32_t *k, uint32_t offset, uint32_t length, uint32_t initval)} 36 | *

37 | *

If you know the number of code points in the {@code CharSequence}, you can 38 | * generate the same hash as the original lookup3 39 | * via {@code lookup3ycs(s, start, end, initval+(numCodePoints<<2))} 40 | * */ 41 | uint32_t lookup3ycs_2(const char *s, uint32_t start, 42 | uint32_t end, uint32_t initval); 43 | 44 | /** 45 | *

This is the 64 bit version of lookup3ycs, corresponding to Bob Jenkin's 46 | * lookup3 hashlittle2 with initval biased by -(numCodePoints<<2). It is equivalent 47 | * to lookup3ycs in that if the high bits of initval==0, then the low bits of the 48 | * result will be the same as lookup3ycs. 49 | *

50 | * */ 51 | uint64_t lookup3ycs64(const char *s, uint32_t start, 52 | uint32_t end, uint64_t initval); 53 | 54 | uint64_t lookup3ycs64_2(const char *s); 55 | 56 | #ifdef __cplusplus 57 | } 58 | #endif 59 | 60 | #endif 61 | 62 | /* vi:ft=c ts=4 sw=4 fdm=marker et 63 | * */ 64 | 65 | -------------------------------------------------------------------------------- /include/murmurhash.h: -------------------------------------------------------------------------------- 1 | #ifndef MURMURHASH_H__ 2 | #define MURMURHASH_H__ 3 | 4 | #include 5 | 6 | #ifdef __cplusplus 7 | extern "C" { 8 | #endif 9 | 10 | /** 11 | * Generate 32bit hash code of the given data using Murmurhash algorithm. 12 | * 13 | * @param buf Pointer to the data buffer 14 | * @param len Data length 15 | * @param seed Initial hash seed to saltify result 16 | * 17 | * @return Calculated hash code. 18 | * */ 19 | uint32_t murmurhash(void *buf, uint32_t len, uint32_t seed); 20 | 21 | /** 22 | * Generate 32bit hash code of the given 64bit integer using Murmurhash 23 | * algorithm. 24 | * 25 | * @param data The 64bit integer to be hashed 26 | * 27 | * @return Calculated hash code. 28 | * */ 29 | uint32_t murmurhash_long(uint64_t data); 30 | 31 | /** 32 | * Generate 64bit hash code of the given data using Murmurhash algorithm. 33 | * 34 | * @param buf Pointer to the data buffer 35 | * @param len Data length 36 | * @param seed Initial hash seed to saltify result 37 | * 38 | * @return Calculated hash code. 39 | * */ 40 | uint64_t murmurhash64(void *buf, uint32_t len, uint32_t seed); 41 | 42 | /** 43 | * Generate 64bit hash code of the given data using Murmurhash algorithm with default seed. 44 | * 45 | * @param buf Pointer to the data buffer 46 | * @param len Data length 47 | * 48 | * @return Calculated hash code. 49 | * */ 50 | uint64_t murmurhash64_no_seed(void *buf, uint32_t len); 51 | 52 | #ifdef __cplusplus 53 | } 54 | #endif 55 | 56 | #endif 57 | 58 | /* vi:ft=c ts=4 sw=4 fdm=marker et 59 | * */ 60 | 61 | -------------------------------------------------------------------------------- /include/register_set.h: -------------------------------------------------------------------------------- 1 | #ifndef REGISTERSET_H__ 2 | #define REGISTERSET_H__ 3 | 4 | #include 5 | 6 | #ifdef __cplusplus 7 | extern "C" { 8 | #endif 9 | 10 | typedef struct reg_set_s { 11 | uint32_t count; 12 | uint32_t size; 13 | uint32_t M[1]; 14 | } reg_set_t; 15 | 16 | /** 17 | * Initialize a new register set. 18 | * 19 | * @param[in] count Logical elements number of register set. 20 | * @param[in] values Initial values. NULL if there is no initial values. 21 | * @param[in] len Length of initial values. 22 | * 23 | * @retval not-NULL An initialized register set to be used with the rest of 24 | * methods. 25 | * @retval NULL If error occured. 26 | * */ 27 | reg_set_t *rs_init(uint32_t count, uint32_t *values, 28 | uint32_t len); 29 | 30 | /** 31 | * Set value. 32 | * 33 | * @param[in] rs Register set. 34 | * @param[in] pos Logical element number. 35 | * @param[in] value Value to be set. 36 | * 37 | * @retval 0 If success. 38 | * @retval -1 If error occured. 39 | * */ 40 | int rs_set(reg_set_t *rs, uint32_t pos, uint32_t value); 41 | 42 | /** 43 | * Get value. 44 | * 45 | * @param[in] rs Register set. 46 | * @param[in] pos Logical element number. 47 | * @param[out] value Store output value. 48 | * 49 | * @retval 0 If success. 50 | * @retval -1 If error occured. 51 | * */ 52 | int rs_get(reg_set_t *rs, uint32_t pos, uint32_t *value); 53 | 54 | /** 55 | * Convert register set to bitmap. 56 | * 57 | * @param[in] rs Register set. 58 | * @param[out] bits Buffer that stores bitmap. 59 | * @param[in|out] len Buffer size and bitmap length. 60 | * 61 | * @retval 0 If success. 62 | * @retval -1 If error occured. 63 | * */ 64 | int rs_bits(reg_set_t *rs, uint32_t *bits, 65 | uint32_t *len); 66 | 67 | /** 68 | * Destory register set and release resource. 69 | * 70 | * @param[in] rs Register set. 71 | * 72 | * @retval 0 If success. 73 | * @retval -1 If error occured. 74 | * */ 75 | int rs_fini(reg_set_t *rs); 76 | 77 | #ifdef __cplusplus 78 | } 79 | #endif 80 | 81 | #endif 82 | 83 | /* vi:ft=c ts=4 sw=4 fdm=marker et 84 | * */ 85 | 86 | -------------------------------------------------------------------------------- /include/sparse_bitmap.h: -------------------------------------------------------------------------------- 1 | #ifndef SPARSE_BITMAP_H__ 2 | #define SPARSE_BITMAP_H__ 3 | 4 | #include 5 | 6 | #ifdef __cplusplus 7 | extern "C" { 8 | #endif 9 | 10 | #define HF(opt) ((opt) & 0x0f) 11 | #define IS_SPARSE_BMP(bmp) ((bmp)[0] & 0x80) 12 | #define MAKE_SPARSE_ID(k) (0x80 | (k)) 13 | #define K_FROM_ID(id) (0x7f & (id)) 14 | 15 | /** 16 | * Additional options 17 | * */ 18 | enum { 19 | CCARD_OPT_SPARSE = 0x10 20 | }; 21 | 22 | /** 23 | * Convert little-endian byte-seq starting at specified position to 24 | * integer. 25 | * 26 | * @param[in] bmp Pointer to the original byte array 27 | * @param[in] off Byte-sequence starting offset in bytes 28 | * @param[in] len Byte-sequence length in bytes 29 | * @retval Converted integer. 30 | * */ 31 | int sparse_bytes_to_int(const uint8_t *bmp, int off, int len); 32 | 33 | /** 34 | * Convert the given integer to little-endian byte-seq starting at 35 | * specified position. 36 | * 37 | * @param[in] bmp Pointer to the resulting byte array 38 | * @param[in] off Resulting byte-sequence starting offset in bytes 39 | * @param[in] len Byte-sequence length in bytes 40 | * @param[in] val Integer to be converted 41 | * */ 42 | void sparse_int_to_bytes(uint8_t *bmp, int off, int len, int val); 43 | 44 | #ifdef __cplusplus 45 | } 46 | #endif 47 | 48 | #endif 49 | /* vi:ft=c ts=4 sw=4 fdm=marker et 50 | * */ 51 | 52 | -------------------------------------------------------------------------------- /src/SConscript: -------------------------------------------------------------------------------- 1 | import os 2 | 3 | ver = '0.1' 4 | name = 'ccard-lib' 5 | libdir = '$PREFIX/usr/lib/' 6 | 7 | env = Environment( 8 | PREFIX = GetOption('prefix'), 9 | CPPPATH = ["#/include"], 10 | CCFLAGS = ["-Wall", "-Wextra", "-Werror", "-g3", "-std=c99"] 11 | ) 12 | # to comply with travis's compiler setting 13 | env["CC"] = os.getenv("CC") or env["CC"] 14 | env["CXX"] = os.getenv("CXX") or env["CXX"] 15 | env["ENV"].update(x for x in os.environ.items() if x[0].startswith("CCC_")) 16 | 17 | srcs = Glob("*.c") 18 | libname = name + '.' + ver 19 | slname = env.subst('$LIBPREFIX') + name + env.subst('$LIBSUFFIX') 20 | dlname = env.subst('$SHLIBPREFIX') + name + env.subst('$SHLIBSUFFIX') 21 | 22 | sl = env.StaticLibrary(libname, srcs) 23 | dl = env.SharedLibrary(libname, srcs) 24 | 25 | sl_new = env.Command("../" + str(sl[0]), sl, Move("$TARGET", "$SOURCE")) 26 | dl_new = env.Command("../" + str(dl[0]), dl, Move("$TARGET", "$SOURCE")) 27 | 28 | env.Install(libdir, [sl_new, dl_new]) 29 | 30 | # make version-less link to static and shared libraries 31 | cmd_ln_static = 'ln -s '+str(sl[0])+' '+slname 32 | cmd_ln_dso = 'ln -s '+str(dl[0])+' '+dlname 33 | 34 | env.Command("../"+slname, sl_new, cmd_ln_static, chdir=1) 35 | env.Command("../"+dlname, dl_new, cmd_ln_dso, chdir=1) 36 | 37 | env.Command(libdir+slname, libdir+str(sl[0]), cmd_ln_static, chdir=1) 38 | env.Command(libdir+dlname, libdir+str(dl[0]), cmd_ln_dso, chdir=1) 39 | 40 | # make 'install' the alias of library directory target 41 | env.Alias('install', libdir) 42 | 43 | # vi:ft=python ts=4 sw=4 et fdm=marker 44 | -------------------------------------------------------------------------------- /src/adaptive_counting.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | #include 6 | #include 7 | #include 8 | #include 9 | #include "murmurhash.h" 10 | #include "lookup3hash.h" 11 | #include "adaptive_counting.h" 12 | 13 | struct adp_cnt_ctx_s { 14 | int err; 15 | uint8_t k; /* log2(total buckets) */ 16 | uint8_t hf; /* hash function id */ 17 | uint8_t sidx_len; /* bucket index length */ 18 | double Ca; /* bias correction coefficient */ 19 | uint32_t m; /* total buckets */ 20 | uint32_t Rsum; /* sum of non-empty buckets */ 21 | uint32_t b_e; /* number of empty buckets */ 22 | uint32_t bmp_len; /* actual bitmap length */ 23 | uint8_t *M; /* pointer to buckets array */ 24 | }; 25 | 26 | /** 27 | * Gamma function computed using SciLab 28 | * 29 | * ((gamma(-(m.^(-1))).* ( (1-2.^(m.^(-1)))./log(2) )).^(-m)).*m 30 | * 31 | * which is a_m in page 5 of the paper "LogLog Counting of Large 32 | * Cardinalities". 33 | * */ 34 | static const double alpha[] = { 35 | 0, 36 | 0.44567926005415, 37 | 1.2480639342271, 38 | 2.8391255240079, 39 | 6.0165231584811, 40 | 12.369319965552, 41 | 25.073991603109, 42 | 50.482891762521, 43 | 101.30047482549, 44 | 202.93553337953, 45 | 406.20559693552, 46 | 812.74569741657, 47 | 1625.8258887309, 48 | 3251.9862249084, 49 | 6504.3071471860, 50 | 13008.949929672, 51 | 26018.222470181, 52 | 52036.684135280, 53 | 104073.41696276, 54 | 208139.24771523, 55 | 416265.57100022, 56 | 832478.53851627, 57 | 1669443.2499579, 58 | 3356902.8702907, 59 | 6863377.8429508, 60 | 11978069.823687, 61 | 31333767.455026, 62 | 52114301.457757, 63 | 72080129.928986, 64 | 68945006.880409, 65 | 31538957.552704, 66 | 3299942.4347441 67 | }; 68 | 69 | /** 70 | * algorithm-switching empty bucket ratio 71 | */ 72 | static const double B_s = 0.051; 73 | 74 | static uint8_t 75 | num_of_trail_zeros(uint64_t i) 76 | { 77 | uint64_t y; 78 | uint8_t n = 63; 79 | 80 | if (i == 0) 81 | return 64; 82 | 83 | y = i << 32; if (y != 0) { n -= 32; i = y; } 84 | y = i << 16; if (y != 0) { n -= 16; i = y; } 85 | y = i << 8; if (y != 0) { n -= 8; i = y; } 86 | y = i << 4; if (y != 0) { n -= 4; i = y; } 87 | y = i << 2; if (y != 0) { n -= 2; i = y; } 88 | return n - (uint8_t)((i << 1) >> 63); 89 | } 90 | 91 | static int 92 | sparse_bisect_search(adp_cnt_ctx_t *ctx, int bkt_no) 93 | { 94 | int step = ctx->sidx_len + 1; /* B[1] and IDX[d] */ 95 | int cur_bkts = (ctx->bmp_len - 1) / step; /* current buckets in sparse 96 | array */ 97 | int begin = 0, end = cur_bkts, mid, off, idx; 98 | 99 | /* bisect-search the given bucket in sparse array */ 100 | while(begin <= end && begin < cur_bkts && end >= 0) { 101 | mid = (begin + end) >> 1; 102 | off = mid * step + 1; 103 | idx = sparse_bytes_to_int(ctx->M, off + 1, ctx->sidx_len); 104 | if(bkt_no == idx) { 105 | return off; 106 | } else if(bkt_no < idx) { 107 | end = mid - 1; 108 | } else { 109 | begin = mid + 1; 110 | } 111 | } 112 | 113 | return -1; 114 | } 115 | 116 | static int 117 | sparse_should_use_normal_bitmap(adp_cnt_ctx_t *ctx, uint32_t used_bkts) 118 | { 119 | return (used_bkts + 1) * (ctx->sidx_len + 1) >= ctx->m; 120 | } 121 | 122 | static void 123 | sparse_insert_bucket(adp_cnt_ctx_t *ctx, int bkt_no, uint8_t bkt_val) 124 | { 125 | int step = ctx->sidx_len + 1; 126 | int idx; 127 | uint32_t off; 128 | 129 | /* extend sparse array for one more bucket */ 130 | ctx->M = realloc(ctx->M, ctx->bmp_len + step); 131 | 132 | /* search insertion position */ 133 | for(off = 1; off < ctx->bmp_len; off += step) { 134 | idx = sparse_bytes_to_int(ctx->M, off + 1, ctx->sidx_len); 135 | if(idx > bkt_no) { 136 | break; 137 | } 138 | } 139 | if(off < ctx->bmp_len) { 140 | /* insertion position is between original buckets, make room for 141 | * inserted bucket */ 142 | memmove(ctx->M + off + step, ctx->M + off, ctx->bmp_len - off); 143 | } 144 | 145 | /* insert new bucket */ 146 | ctx->M[off] = bkt_val; 147 | sparse_int_to_bytes(ctx->M, off + 1, ctx->sidx_len, bkt_no); 148 | 149 | ctx->bmp_len += step; 150 | } 151 | 152 | static void 153 | sparse_to_normal_bitmap(adp_cnt_ctx_t *ctx) 154 | { 155 | int idx; 156 | uint32_t i, step = ctx->sidx_len + 1; 157 | uint8_t *bmp = calloc(sizeof(uint8_t), ctx->m); 158 | 159 | /* convert sparse format to normal format */ 160 | for(i = 1; i < ctx->bmp_len; i += step) { 161 | idx = sparse_bytes_to_int(ctx->M, i + 1, ctx->sidx_len); 162 | bmp[idx] = ctx->M[i]; 163 | } 164 | 165 | /* replace sparse bucket array to normal one */ 166 | free(ctx->M); 167 | ctx->bmp_len = ctx->m; 168 | ctx->M = bmp; 169 | } 170 | 171 | /** 172 | * Verify context and given bitmap has identical total bucket number 173 | * 174 | * @param[in] ctx Context to be merged to. Must be non-NULL. 175 | * @param[in] is_raw 1 if given bitmap is in raw format, 0 if in external 176 | * format. 177 | * @param[in] buf The bitmap to be merged. Must be non-NULL. 178 | * @param[in] len Length of the bitmap. 179 | * @retval -1 Verification failed, the given bitmap can't be merged to ctx. 180 | * @retval 0 Verification success and there exists at least 1 normal bitmap. 181 | * @retval 1 Verification success and there are only sparse bitmaps. 182 | * */ 183 | static int 184 | unified_bitmap_verify(adp_cnt_ctx_t *ctx, int is_raw, 185 | const void *buf, uint32_t len) 186 | { 187 | int rc; 188 | const uint8_t *in = buf; 189 | uint32_t m; 190 | 191 | assert(ctx && buf); 192 | 193 | if(!is_raw) { 194 | /* bitmap is not in raw format, check header first */ 195 | if(in[0] != CCARD_ALGO_ADAPTIVE 196 | || in[1] != ctx->hf 197 | || in[2] != ctx->k 198 | || len <= 3) { 199 | return -1; 200 | } 201 | 202 | /* strip out header bytes */ 203 | in += 3; 204 | len -= 3; 205 | } 206 | 207 | if(IS_SPARSE_BMP(in)) { 208 | m = 1 << K_FROM_ID(in[0]); 209 | rc = 1; 210 | } else { 211 | m = len; 212 | rc = 0; 213 | } 214 | if(ctx->m != m) { 215 | return -1; 216 | } 217 | 218 | return rc; 219 | } 220 | 221 | /** 222 | * Check if there are bitmaps in normal format among all bitmaps to be merged. 223 | * 224 | * @param[in] ctx Context to be merged to. Must be non-NULL. 225 | * @param[in] buf_cnt Number of raw bitmaps to be merged. 226 | * @param[in] pbuf Pointer to the array of raw bitmap pointers. 227 | * @param[in] plen Pointer to the array of raw bitmap lengths. 228 | * @retval 0 Verification success and there exists at least 1 normal bitmap. 229 | * @retval 1 Verification success and there are only sparse bitmaps. 230 | * */ 231 | static int 232 | is_there_normal_raw_bitmap(adp_cnt_ctx_t *ctx, int buf_cnt, 233 | const uint8_t **pbuf, uint32_t *plen) 234 | { 235 | int rc; 236 | int i; 237 | int sparse_only = 1; 238 | 239 | assert(ctx); 240 | 241 | for(i = 0; i < buf_cnt; i++) { 242 | rc = unified_bitmap_verify(ctx, 1, pbuf[i], plen[i]); 243 | if(rc == 0) { 244 | sparse_only = 0; 245 | } 246 | } 247 | 248 | if(sparse_only) { 249 | return 1; 250 | } 251 | return 0; 252 | } 253 | 254 | /** 255 | * Count distinct buckets among all given sparse bitmaps 256 | * 257 | * @note It is done by n-way merge all bucket arrays 258 | * */ 259 | static uint32_t 260 | sparse_count_distinct_buckets(adp_cnt_ctx_t *ctx, int buf_cnt, 261 | const uint8_t **pbuf, uint32_t *plen) 262 | { 263 | int i; 264 | int total_bkts; 265 | int idx; 266 | int last_idx; 267 | int min_idx; 268 | int min_est_no; 269 | uint32_t *offs = (uint32_t *)alloca(sizeof(uint32_t) * buf_cnt); 270 | 271 | /* ignore initial ID byte in all sparse bitmaps */ 272 | for(i = 0; i < buf_cnt; i++) { 273 | offs[i] = 1; 274 | } 275 | 276 | /* n-way merge to count distinct buckets */ 277 | total_bkts = 0; 278 | min_est_no = 0; 279 | last_idx = -1; 280 | while(min_est_no != -1) { 281 | min_idx = INT_MAX; 282 | min_est_no = -1; 283 | 284 | /* find minimum bucket index among heads of sparse bucket arrays */ 285 | for(i = 0; i < buf_cnt; i++) { 286 | if(offs[i] < plen[i]) { 287 | idx = sparse_bytes_to_int(pbuf[i], offs[i] + 1, ctx->sidx_len); 288 | if(idx < min_idx) { 289 | min_idx = idx; 290 | min_est_no = i; 291 | } 292 | } 293 | } 294 | 295 | if(min_est_no != -1) { 296 | /* advance the head of merged sparse bucket array */ 297 | offs[min_est_no] += ctx->sidx_len + 1; 298 | /* increase count of distinct bucket when new index occured*/ 299 | if(last_idx != min_idx) { 300 | total_bkts++; 301 | last_idx = min_idx; 302 | } 303 | } 304 | } 305 | 306 | return total_bkts; 307 | } 308 | 309 | /** 310 | * Merge all given sparse/normal bitmaps to a normal bitmap 311 | * 312 | * @note dbm should be zero'd out before call this routine, otherwise the 313 | * result would be unexpected 314 | * */ 315 | static void 316 | merge_to_normal_bmp(uint8_t *dbm, adp_cnt_ctx_t *ctx, int buf_cnt, 317 | const uint8_t **pbuf, uint32_t *plen) 318 | { 319 | int step = ctx->sidx_len + 1; 320 | int i; 321 | uint32_t j; 322 | 323 | for(i = 0; i < buf_cnt; i++) { 324 | if(IS_SPARSE_BMP(pbuf[i])) { 325 | /* merge sparse bitmap */ 326 | for(j = 1; j < plen[i]; j += step) { 327 | uint8_t r = pbuf[i][j]; 328 | int idx = sparse_bytes_to_int(pbuf[i], j + 1, ctx->sidx_len); 329 | if(dbm[idx] < r) { 330 | dbm[idx] = r; 331 | } 332 | } 333 | } else { 334 | /* merge normal bitmap */ 335 | for(j = 0; j < plen[i]; j++) { 336 | if(dbm[j] < pbuf[i][j]) { 337 | dbm[j] = pbuf[i][j]; 338 | } 339 | } 340 | } 341 | } 342 | } 343 | 344 | /** 345 | * Merge all given sparse bitmaps to a sparse bitmap 346 | * 347 | * @note dbm should be zero'd out before call this routine, otherwise the 348 | * result would be unexpected 349 | * */ 350 | static void 351 | merge_to_sparse_bmp(uint8_t *dbm, adp_cnt_ctx_t *ctx, 352 | int buf_cnt, const uint8_t **pbuf, uint32_t *plen) 353 | { 354 | int i; 355 | int idx; 356 | int min_idx; 357 | int min_est_no; 358 | uint8_t min_r; 359 | int last_idx; 360 | int step = ctx->sidx_len + 1; 361 | int off; 362 | uint32_t *offs = (uint32_t *)alloca(sizeof(uint32_t) * buf_cnt); 363 | 364 | /* generate sparse bitmap ID */ 365 | off = 1; 366 | dbm[0] = MAKE_SPARSE_ID(ctx->k); 367 | 368 | /* ignore initial ID byte in all sparse bitmaps */ 369 | for(i = 0; i < buf_cnt; i++) { 370 | offs[i] = 1; 371 | } 372 | 373 | /* n-way merge sparse bitmaps */ 374 | min_est_no = 0; 375 | min_r = 0; 376 | last_idx = -1; 377 | while(min_est_no != -1) { 378 | min_idx = INT_MAX; 379 | min_est_no = -1; 380 | 381 | /* find minimum bucket index among heads of sparse bucket arrays */ 382 | for(i = 0; i < buf_cnt; i++) { 383 | if(offs[i] < plen[i]) { 384 | idx = sparse_bytes_to_int(pbuf[i], offs[i] + 1, ctx->sidx_len); 385 | if(idx < min_idx) { 386 | min_idx = idx; 387 | min_est_no = i; 388 | min_r = pbuf[i][offs[i]]; 389 | } 390 | } 391 | } 392 | 393 | if(min_est_no != -1) { 394 | /* insert bucket index */ 395 | if(last_idx != min_idx) { 396 | if(last_idx != -1) { 397 | off += step; 398 | } 399 | sparse_int_to_bytes(dbm, off + 1, ctx->sidx_len, min_idx); 400 | last_idx = min_idx; 401 | } 402 | /* merge bucket value */ 403 | if(dbm[off] < min_r) { 404 | dbm[off] = min_r; 405 | } 406 | /* advance the head of merged sparse bucket array */ 407 | offs[min_est_no] += step; 408 | } 409 | } 410 | } 411 | 412 | /** 413 | * Update estimator state in context according to bucket array data 414 | * 415 | * @note Context must be initialized with correct bucket array data first 416 | * */ 417 | static void 418 | update_estimator_state(adp_cnt_ctx_t *ctx, int init) 419 | { 420 | uint32_t i; 421 | 422 | ctx->sidx_len = (ctx->k + 7) / 8; /* =(int)ceil(k/8.0) */ 423 | ctx->Ca = alpha[ctx->k]; 424 | ctx->Rsum = 0; 425 | ctx->b_e = ctx->m; 426 | 427 | if(!init) { 428 | if(IS_SPARSE_BMP(ctx->M)) { 429 | /* skip ID byte and accumulate all sparse buckets */ 430 | for(i = 1; i < ctx->bmp_len; i += ctx->sidx_len + 1) { 431 | ctx->Rsum += ctx->M[i]; 432 | ctx->b_e--; 433 | } 434 | } else { 435 | /* traverse all buckets and accumulate non-empty ones */ 436 | for(i = 0; i < ctx->bmp_len; i++) { 437 | if (ctx->M[i] > 0) { 438 | ctx->Rsum += ctx->M[i]; 439 | ctx->b_e--; 440 | } 441 | } 442 | } 443 | } 444 | } 445 | 446 | /** 447 | * Merge all given raw bitmaps and replace bitmap in current context 448 | * 449 | * @note The given raw bitmap list should contains current context bitmap if it 450 | * should also be merged 451 | * */ 452 | static int 453 | aux_merge_raw_bytes(adp_cnt_ctx_t *ctx, int buf_cnt, 454 | const uint8_t **pbuf, uint32_t *plen) 455 | { 456 | int rc; 457 | int gen_normal = 0; 458 | uint8_t *dbm = NULL; 459 | uint32_t dlen; 460 | uint32_t bkts = 0; 461 | 462 | rc = is_there_normal_raw_bitmap(ctx, buf_cnt, pbuf, plen); 463 | if(rc == 0) { 464 | /* there exists at least 1 normal bitmap, merge to normal format */ 465 | dlen = ctx->m; 466 | gen_normal = 1; 467 | } else { 468 | /* there are only sparse bitmaps */ 469 | bkts = sparse_count_distinct_buckets(ctx, buf_cnt, pbuf, plen); 470 | if(!sparse_should_use_normal_bitmap(ctx, bkts)) { 471 | /* merge to sparse format has less memory overhead */ 472 | dlen = bkts * (ctx->sidx_len + 1) + 1; /* buckets + ID */ 473 | gen_normal = 0; 474 | } else { 475 | /* merge to normal format */ 476 | dlen = ctx->m; 477 | gen_normal = 1; 478 | } 479 | } 480 | 481 | dbm = (uint8_t *)calloc(sizeof(uint8_t), dlen); 482 | if(gen_normal) { 483 | merge_to_normal_bmp(dbm, ctx, buf_cnt, pbuf, plen); 484 | } else { 485 | merge_to_sparse_bmp(dbm, ctx, buf_cnt, pbuf, plen); 486 | } 487 | 488 | /* replace context bitmap with merged one and update estimator state */ 489 | free(ctx->M); 490 | ctx->M = dbm; 491 | ctx->bmp_len = dlen; 492 | update_estimator_state(ctx, 0); 493 | 494 | ctx->err = CCARD_OK; 495 | return 0; 496 | } 497 | 498 | adp_cnt_ctx_t * 499 | adp_cnt_raw_init(const void *obuf, uint32_t len_or_k, uint8_t opt) 500 | { 501 | adp_cnt_ctx_t *ctx; 502 | uint8_t *buf = (uint8_t *)obuf; 503 | 504 | if (len_or_k == 0) { 505 | /* invalid buffer length or k */ 506 | return NULL; 507 | } 508 | 509 | if (buf) { 510 | /* initial bitmap was given */ 511 | uint8_t k; 512 | uint32_t m; 513 | 514 | if(IS_SPARSE_BMP(buf)) { 515 | /* 516 | * initial bitmap is sparse one, calculate real bitmap size. 517 | * 518 | * the sparse bitmap format is: 519 | * 520 | * +-------+-------+---------+-------+---------+-----+ 521 | * | ID[1] | B0[1] | IDX0[d] | B1[1] | IDX1[d] | ... | 522 | * +-------+-------+---------+-------+---------+-----+ 523 | * 524 | * where ID is 0x80|k, B* is non-empty bucket values (always >0) 525 | * and IDX* is corresponding bucket index (var-len little-endian 526 | * bytes, equal to ceil(k/8)). 527 | * 528 | * if there're no non-empty buckets, sparse bitmap has only the ID 529 | * byte. 530 | */ 531 | k = K_FROM_ID(buf[0]); 532 | m = (uint32_t)(1 << k); 533 | } else { 534 | /* initial bitmap is normal one */ 535 | k = num_of_trail_zeros(len_or_k); 536 | m = len_or_k; 537 | if (m != (uint32_t)(1 << k)) { 538 | /* invalid buffer size, its length must be a power of 2 */ 539 | return NULL; 540 | } 541 | } 542 | 543 | if (k >= sizeof(alpha) / sizeof(alpha[0])) { 544 | /* exceeded maximum k */ 545 | return NULL; 546 | } 547 | 548 | ctx = (adp_cnt_ctx_t *)malloc(sizeof(adp_cnt_ctx_t)); 549 | ctx->err = CCARD_OK; 550 | ctx->m = m; 551 | ctx->k = k; 552 | ctx->bmp_len = len_or_k; 553 | ctx->M = malloc(ctx->bmp_len); 554 | memcpy(ctx->M, buf, ctx->bmp_len); 555 | ctx->hf = HF(opt); 556 | 557 | update_estimator_state(ctx, 0); 558 | } else { 559 | /* only k was given */ 560 | uint8_t k = len_or_k; 561 | if (k >= sizeof(alpha) / sizeof(alpha[0])) { 562 | /* exceeded maximum k */ 563 | return NULL; 564 | } 565 | 566 | if(opt & CCARD_OPT_SPARSE) { 567 | /* create sparse bitmap with only ID byte*/ 568 | ctx = (adp_cnt_ctx_t *)malloc(sizeof(adp_cnt_ctx_t)); 569 | ctx->bmp_len = 1; 570 | ctx->M = malloc(ctx->bmp_len); 571 | ctx->M[0] = MAKE_SPARSE_ID(k); 572 | } else { 573 | /* create normal bitmap */ 574 | ctx = (adp_cnt_ctx_t *)malloc(sizeof(adp_cnt_ctx_t)); 575 | ctx->bmp_len = 1 << k; 576 | ctx->M = malloc(ctx->bmp_len); 577 | memset(ctx->M, 0, ctx->bmp_len); 578 | } 579 | 580 | ctx->err = CCARD_OK; 581 | ctx->m = 1 << k; 582 | ctx->k = k; 583 | ctx->hf = HF(opt); 584 | 585 | update_estimator_state(ctx, 1); 586 | } 587 | 588 | return ctx; 589 | } 590 | 591 | adp_cnt_ctx_t * 592 | adp_cnt_init(const void *obuf, uint32_t len_or_k, uint8_t opt) 593 | { 594 | uint8_t *buf = (uint8_t *)obuf; 595 | 596 | if (buf) { 597 | if(len_or_k <= 3) { 598 | return NULL; 599 | } 600 | 601 | uint32_t data_segment_size = len_or_k - 3; 602 | uint8_t k = 0; 603 | 604 | if(IS_SPARSE_BMP(&buf[3])) { 605 | /* sparse bitmap, get k from the 1st byte of bitmap */ 606 | k = K_FROM_ID(buf[3]); 607 | } else { 608 | /* dense bitmap, calculate k from total length of bitmap */ 609 | k = num_of_trail_zeros(data_segment_size); 610 | } 611 | 612 | if (buf[0] != CCARD_ALGO_ADAPTIVE || 613 | buf[1] != HF(opt) || 614 | buf[2] != k) { 615 | 616 | /* counting algorithm, hash function or length not match */ 617 | return NULL; 618 | } 619 | 620 | return adp_cnt_raw_init(buf + 3, data_segment_size, opt); 621 | } 622 | 623 | return adp_cnt_raw_init(NULL, len_or_k, opt); 624 | } 625 | 626 | int64_t 627 | adp_cnt_card_loglog(adp_cnt_ctx_t *ctx) 628 | { 629 | double Ravg; 630 | 631 | if (!ctx) { 632 | return -1; 633 | } 634 | 635 | Ravg = ctx->Rsum / (double)ctx->m; 636 | ctx->err = CCARD_OK; 637 | return (int64_t)(ctx->Ca * pow(2, Ravg)); 638 | } 639 | 640 | int64_t 641 | adp_cnt_card(adp_cnt_ctx_t *ctx) 642 | { 643 | double B = ctx->b_e / (double)ctx->m; 644 | 645 | if (!ctx) { 646 | return -1; 647 | } 648 | 649 | if (B >= B_s) { 650 | ctx->err = CCARD_OK; 651 | return (int64_t)round((-(double)ctx->m) * log(B)); 652 | } 653 | 654 | return adp_cnt_card_loglog(ctx); 655 | } 656 | 657 | int 658 | adp_cnt_offer(adp_cnt_ctx_t *ctx, const void *buf, uint32_t len) 659 | { 660 | int modified = 0; 661 | uint64_t x, j; 662 | uint8_t r, hl; 663 | 664 | if (!ctx) { 665 | return -1; 666 | } 667 | 668 | switch (ctx->hf) { 669 | case CCARD_HASH_MURMUR: 670 | x = (uint64_t)murmurhash((void *)buf, len, -1); 671 | hl = 32; 672 | break; 673 | case CCARD_HASH_LOOKUP3: 674 | x = lookup3ycs64_2((const char *)buf); 675 | hl = 64; 676 | break; 677 | default: 678 | /* default to use lookup3 hash function */ 679 | x = lookup3ycs64_2((const char *)buf); 680 | hl = 64; 681 | } 682 | 683 | j = x >> (hl - ctx->k); 684 | r = (uint8_t)(num_of_trail_zeros(x << (ctx->k + 64 - hl)) - (ctx->k + 64 - 685 | hl) + 1); 686 | 687 | if(IS_SPARSE_BMP(ctx->M)) { 688 | /* update sparse bucket counter */ 689 | int off = sparse_bisect_search(ctx, j); 690 | 691 | if(off != -1) { 692 | /* the bucket to be updated already exists, no need to decrease 693 | * empty bucket counter */ 694 | if(ctx->M[off] < r) { 695 | ctx->Rsum += r - ctx->M[off]; 696 | ctx->M[off] = r; 697 | modified = 1; 698 | } 699 | return modified; 700 | } 701 | 702 | if(!sparse_should_use_normal_bitmap(ctx, ctx->m - ctx->b_e)) { 703 | /* still use sparse format to insert new bucket */ 704 | sparse_insert_bucket(ctx, j, r); 705 | ctx->Rsum += r; 706 | ctx->b_e--; 707 | return 1; 708 | } 709 | 710 | /* convert sparse buckets to normal format, fallback to CONT */ 711 | sparse_to_normal_bitmap(ctx); 712 | } 713 | 714 | /* CONT: update normal bucket counter */ 715 | if (ctx->M[j] < r) { 716 | ctx->Rsum += r - ctx->M[j]; 717 | if (ctx->M[j] == 0) { 718 | ctx->b_e--; 719 | } 720 | ctx->M[j] = r; 721 | 722 | modified = 1; 723 | } 724 | 725 | ctx->err = CCARD_OK; 726 | return modified; 727 | } 728 | 729 | int 730 | adp_cnt_get_raw_bytes(adp_cnt_ctx_t *ctx, void *buf, uint32_t *len) 731 | { 732 | uint8_t *out = (uint8_t *)buf; 733 | 734 | if (!ctx || !len || (out && *len < ctx->bmp_len)) { 735 | return -1; 736 | } 737 | 738 | if(out) { 739 | memcpy(out, ctx->M, ctx->bmp_len); 740 | } 741 | *len = ctx->bmp_len; 742 | 743 | return 0; 744 | } 745 | 746 | int 747 | adp_cnt_get_bytes(adp_cnt_ctx_t *ctx, void *buf, uint32_t *len) 748 | { 749 | /* 750 | +--------------+---------+------------------------------+-----------+ 751 | | algorithm[1] | hash[1] | bitmap length(base-2 log)[1] | bitmap[n] | 752 | +--------------+---------+------------------------------+-----------+ 753 | */ 754 | uint8_t algo = CCARD_ALGO_ADAPTIVE; 755 | uint8_t *out = (uint8_t *)buf; 756 | 757 | if (!ctx || !len || (out && *len < ctx->bmp_len + 3)) { 758 | return -1; 759 | } 760 | 761 | if (out) { 762 | out[0] = algo; 763 | out[1] = ctx->hf; 764 | out[2] = ctx->k; 765 | memcpy(out + 3, ctx->M, ctx->bmp_len); 766 | } 767 | *len = ctx->bmp_len + 3; 768 | 769 | return 0; 770 | } 771 | 772 | int 773 | adp_cnt_merge(adp_cnt_ctx_t *ctx, adp_cnt_ctx_t *tbm, ...) 774 | { 775 | int rc; 776 | va_list vl; 777 | adp_cnt_ctx_t *bm; 778 | 779 | if (!ctx) { 780 | return -1; 781 | } 782 | 783 | if (tbm) { 784 | int invalid = 0; 785 | int buf_cnt; 786 | const uint8_t **pbuf; 787 | uint32_t *plen; 788 | 789 | /* count number of estimators and validate them */ 790 | buf_cnt = 2; /* current context and the 1st estimator in args */ 791 | rc = unified_bitmap_verify(ctx, 1, tbm->M, tbm->bmp_len); 792 | if(rc == -1 || ctx->hf != tbm->hf) { 793 | invalid = 1; 794 | } else { 795 | va_start(vl, tbm); 796 | while ((bm = va_arg(vl, adp_cnt_ctx_t *)) != NULL) { 797 | rc = unified_bitmap_verify(ctx, 1, bm->M, bm->bmp_len); 798 | if(rc == -1 || ctx->hf != bm->hf) { 799 | invalid = 1; 800 | break; 801 | } 802 | buf_cnt++; 803 | } 804 | va_end(vl); 805 | } 806 | 807 | if(invalid) { 808 | ctx->err = CCARD_ERR_MERGE_FAILED; 809 | return -1; 810 | } 811 | 812 | pbuf = (const uint8_t **)alloca(sizeof(const uint8_t *) * buf_cnt); 813 | plen = (uint32_t *)alloca(sizeof(uint32_t) * buf_cnt); 814 | 815 | /* initialize buffer array */ 816 | buf_cnt = 2; 817 | pbuf[0] = ctx->M; 818 | plen[0] = ctx->bmp_len; 819 | pbuf[1] = tbm->M; 820 | plen[1] = tbm->bmp_len; 821 | 822 | va_start(vl, tbm); 823 | while ((bm = va_arg(vl, adp_cnt_ctx_t *)) != NULL) { 824 | pbuf[buf_cnt] = bm->M; 825 | plen[buf_cnt] = bm->bmp_len; 826 | buf_cnt++; 827 | } 828 | va_end(vl); 829 | 830 | rc = aux_merge_raw_bytes(ctx, buf_cnt, pbuf, plen); 831 | } else { 832 | ctx->err = CCARD_OK; 833 | rc = 0; 834 | } 835 | 836 | return rc; 837 | } 838 | 839 | int 840 | adp_cnt_merge_raw_bytes(adp_cnt_ctx_t *ctx, const void *buf, uint32_t len, ...) 841 | { 842 | int rc; 843 | va_list vl; 844 | 845 | if (!ctx) { 846 | return -1; 847 | } 848 | 849 | if (buf) { 850 | int invalid = 0; 851 | int buf_cnt; 852 | const void *in_buf; 853 | uint32_t in_len; 854 | const uint8_t **pbuf; 855 | uint32_t *plen; 856 | 857 | /* count number of buffers and validate them */ 858 | buf_cnt = 2; /* current context and the 1st buffer in args */ 859 | rc = unified_bitmap_verify(ctx, 1, buf, len); 860 | if(rc == -1) { 861 | invalid = 1; 862 | } else { 863 | va_start(vl, len); 864 | while((in_buf = va_arg(vl, const void *)) != NULL) { 865 | in_len = va_arg(vl, uint32_t); 866 | rc = unified_bitmap_verify(ctx, 1, in_buf, in_len); 867 | if(rc == -1) { 868 | invalid = 1; 869 | break; 870 | } 871 | buf_cnt++; 872 | } 873 | va_end(vl); 874 | } 875 | 876 | if(invalid) { 877 | ctx->err = CCARD_ERR_MERGE_FAILED; 878 | return -1; 879 | } 880 | 881 | pbuf = (const uint8_t **)alloca(sizeof(const uint8_t *) * buf_cnt); 882 | plen = (uint32_t *)alloca(sizeof(uint32_t) * buf_cnt); 883 | 884 | /* initialize buffer array */ 885 | buf_cnt = 2; 886 | pbuf[0] = ctx->M; 887 | plen[0] = ctx->bmp_len; 888 | pbuf[1] = buf; 889 | plen[1] = len; 890 | 891 | va_start(vl, len); 892 | while((in_buf = va_arg(vl, const void *)) != NULL) { 893 | in_len = va_arg(vl, uint32_t); 894 | pbuf[buf_cnt] = in_buf; 895 | plen[buf_cnt] = in_len; 896 | buf_cnt++; 897 | } 898 | va_end(vl); 899 | 900 | rc = aux_merge_raw_bytes(ctx, buf_cnt, pbuf, plen); 901 | } else { 902 | ctx->err = CCARD_OK; 903 | rc = 0; 904 | } 905 | 906 | return rc; 907 | } 908 | 909 | int 910 | adp_cnt_merge_bytes(adp_cnt_ctx_t *ctx, const void *buf, uint32_t len, ...) 911 | { 912 | int rc; 913 | va_list vl; 914 | 915 | if (!ctx) { 916 | return -1; 917 | } 918 | 919 | if (buf) { 920 | int invalid = 0; 921 | int buf_cnt; 922 | const void *in_buf; 923 | uint32_t in_len; 924 | const uint8_t **pbuf; 925 | uint32_t *plen; 926 | 927 | 928 | /* count number of buffers and validate them */ 929 | buf_cnt = 2; /* current context and the 1st buffer in args */ 930 | rc = unified_bitmap_verify(ctx, 0, buf, len); 931 | if(rc == -1) { 932 | invalid = 1; 933 | } else { 934 | va_start(vl, len); 935 | while((in_buf = va_arg(vl, const void *)) != NULL) { 936 | in_len = va_arg(vl, uint32_t); 937 | rc = unified_bitmap_verify(ctx, 0, in_buf, in_len); 938 | if(rc == -1) { 939 | invalid = 1; 940 | break; 941 | } 942 | buf_cnt++; 943 | } 944 | va_end(vl); 945 | } 946 | 947 | if(invalid) { 948 | ctx->err = CCARD_ERR_MERGE_FAILED; 949 | return -1; 950 | } 951 | 952 | pbuf = (const uint8_t **)alloca(sizeof(const uint8_t *) * buf_cnt); 953 | plen = (uint32_t *)alloca(sizeof(uint32_t) * buf_cnt); 954 | 955 | /* initialize buffer array (strip headers) */ 956 | buf_cnt = 2; 957 | pbuf[0] = ctx->M; 958 | plen[0] = ctx->bmp_len; 959 | pbuf[1] = (const uint8_t *)buf + 3; 960 | plen[1] = len - 3; 961 | 962 | va_start(vl, len); 963 | while((in_buf = va_arg(vl, const void *)) != NULL) { 964 | in_len = va_arg(vl, uint32_t); 965 | pbuf[buf_cnt] = (const uint8_t *)in_buf + 3; 966 | plen[buf_cnt] = in_len - 3; 967 | buf_cnt++; 968 | } 969 | va_end(vl); 970 | 971 | rc = aux_merge_raw_bytes(ctx, buf_cnt, pbuf, plen); 972 | } else { 973 | ctx->err = CCARD_OK; 974 | rc = 0; 975 | } 976 | 977 | return rc; 978 | } 979 | 980 | int 981 | adp_cnt_reset(adp_cnt_ctx_t *ctx) 982 | { 983 | if (!ctx) { 984 | return -1; 985 | } 986 | 987 | ctx->err = CCARD_OK; 988 | ctx->Rsum = 0; 989 | ctx->b_e = ctx->m; 990 | if(IS_SPARSE_BMP(ctx->M)) { 991 | ctx->M = realloc(ctx->M, 1); 992 | ctx->M[0] = MAKE_SPARSE_ID(ctx->k); 993 | } else { 994 | memset(ctx->M, 0, ctx->m); 995 | } 996 | 997 | return 0; 998 | } 999 | 1000 | int 1001 | adp_cnt_fini(adp_cnt_ctx_t *ctx) 1002 | { 1003 | if (ctx) { 1004 | free(ctx->M); 1005 | free(ctx); 1006 | return 0; 1007 | } 1008 | 1009 | return -1; 1010 | } 1011 | 1012 | int 1013 | adp_cnt_errnum(adp_cnt_ctx_t *ctx) 1014 | { 1015 | if (ctx) { 1016 | return ctx->err; 1017 | } 1018 | 1019 | return CCARD_ERR_INVALID_CTX; 1020 | } 1021 | 1022 | const char * 1023 | adp_cnt_errstr(int err) 1024 | { 1025 | static const char *msg[] = { 1026 | "No error", 1027 | "Invalid algorithm context", 1028 | "Merge bitmap failed", 1029 | NULL 1030 | }; 1031 | 1032 | if (-err >= 0 && -err < (int)(sizeof(msg) / sizeof(msg[0]) - 1)) { 1033 | return msg[-err]; 1034 | } 1035 | 1036 | return "Invalid error number"; 1037 | } 1038 | 1039 | /* vi:ft=c ts=4 sw=4 fdm=marker et 1040 | * */ 1041 | 1042 | -------------------------------------------------------------------------------- /src/hyperloglog_counting.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | #include 6 | #include "murmurhash.h" 7 | #include "lookup3hash.h" 8 | #include "hyperloglog_counting.h" 9 | 10 | struct hll_cnt_ctx_s { 11 | int err; 12 | uint8_t log2m; 13 | uint32_t m; 14 | double alphaMM; 15 | uint8_t hf; 16 | uint8_t M[1]; 17 | }; 18 | 19 | static const double POW_2_32 = 4294967296.0; 20 | static const double NEGATIVE_POW_2_32 = -4294967296.0; 21 | 22 | static uint8_t num_of_trail_zeros(uint64_t i) 23 | { 24 | uint64_t y; 25 | uint8_t n = 63; 26 | 27 | if (i == 0) 28 | return 64; 29 | 30 | y = i << 32; if (y != 0) { n -= 32; i = y; } 31 | y = i << 16; if (y != 0) { n -= 16; i = y; } 32 | y = i << 8; if (y != 0) { n -= 8; i = y; } 33 | y = i << 4; if (y != 0) { n -= 4; i = y; } 34 | y = i << 2; if (y != 0) { n -= 2; i = y; } 35 | 36 | return n - (uint8_t)((i << 1) >> 63); 37 | } 38 | 39 | hll_cnt_ctx_t *hll_cnt_raw_init(const void *obuf, uint32_t len_or_k, uint8_t hf) 40 | { 41 | hll_cnt_ctx_t *ctx; 42 | uint8_t *buf = (uint8_t *)obuf; 43 | uint8_t log2m = buf ? num_of_trail_zeros(len_or_k) : len_or_k; 44 | uint32_t m = pow(2, log2m); 45 | 46 | if (len_or_k == 0) { 47 | // invalid buffer length or k 48 | return NULL; 49 | } 50 | 51 | if (buf) { 52 | // initial bitmap was given 53 | if (len_or_k != (uint32_t)(1 << log2m)) { 54 | // invalid buffer size, its length must be a power of 2 55 | return NULL; 56 | } 57 | 58 | ctx = (hll_cnt_ctx_t *)malloc(sizeof(hll_cnt_ctx_t) + m - 1); 59 | memcpy(ctx->M, buf, m); 60 | } else { 61 | // k was given 62 | ctx = (hll_cnt_ctx_t *)malloc(sizeof(hll_cnt_ctx_t) + m - 1); 63 | memset(ctx->M, 0, m); 64 | } 65 | ctx->err = CCARD_OK; 66 | ctx->log2m = log2m; 67 | ctx->m = m; 68 | ctx->hf = hf; 69 | 70 | /* 71 | * Description of the following magical numbers: 72 | * 73 | * In the HyperLogLog paper page 12-13, alphaMM is a_m*m^2, where: 74 | * 75 | * a_m := 1/(m*J_0(m)) 76 | * 77 | * Here J_s(m) is not the first-kind Bessel function, but defined as the 78 | * value of a special integrals: 79 | * 80 | * J_s(m) := integral(u^s*f(u)^m, u=0..inf) 81 | * 82 | * where f(u) := log_2((2+u)/(1+u)) 83 | * 84 | * After some deductions, we know that J_0(m) can be estimated by: 85 | * 86 | * J_0(m) ~= 2*ln(2)/m*(1+(3*ln(2)-1)/m) 87 | * 88 | * As 1/(2*ln(2)) ~= 0.72135, 3*ln(2)-1 ~= 1.0794, thus: 89 | * 90 | * a_m ~= 0.72135/(1+1.0794/m) 91 | * 92 | * When log_2(m)={4,5,6}, the corresponding a_m will be: 93 | * 94 | * a_16 ~= 0.72135/(1+1.0794/16) = 0.67576 95 | * a_32 ~= 0.72135/(1+1.0794/32) = 0.69781 96 | * a_64 ~= 0.72135/(1+1.0794/64) = 0.70939 97 | * 98 | * There're small errors between calculated and actually used values, 99 | * because stream-lib copied those values from the pseudo code in page 14 100 | * directly. We had to keep compatibility with stream-lib so can't correct 101 | * these values. 102 | **/ 103 | switch (log2m) { 104 | case 4: 105 | ctx->alphaMM = 0.673 * m * m; 106 | break; 107 | case 5: 108 | ctx->alphaMM = 0.697 * m * m; 109 | break; 110 | case 6: 111 | ctx->alphaMM = 0.709 * m * m; 112 | break; 113 | default: 114 | ctx->alphaMM = (0.7213 / (1 + 1.079 / m)) * m * m; 115 | } 116 | 117 | return ctx; 118 | } 119 | 120 | hll_cnt_ctx_t *hll_cnt_init(const void *obuf, uint32_t len_or_k, uint8_t hf) 121 | { 122 | uint8_t *buf = (uint8_t *)obuf; 123 | 124 | if (buf) { 125 | // initial bitmap was given 126 | if(len_or_k <= 3) { 127 | return NULL; 128 | } 129 | 130 | uint32_t data_segment_size = len_or_k - 3; 131 | uint8_t log2m = num_of_trail_zeros(data_segment_size); 132 | 133 | if (buf[0] != CCARD_ALGO_HYPERLOGLOG || 134 | buf[1] != hf || 135 | buf[2] != log2m) { 136 | 137 | // counting algorithm, hash function or length not match 138 | return NULL; 139 | } 140 | 141 | return hll_cnt_raw_init(buf + 3, data_segment_size, hf); 142 | } 143 | 144 | return hll_cnt_raw_init(NULL, len_or_k, hf); 145 | } 146 | 147 | int64_t hll_cnt_card(hll_cnt_ctx_t *ctx) 148 | { 149 | double sum = 0, estimate, zeros = 0; 150 | uint32_t j, z; 151 | 152 | if (!ctx) { 153 | return -1; 154 | } 155 | ctx->err = CCARD_OK; 156 | 157 | for (j = 0; j < ctx->m; j++) { 158 | sum += pow(2, (-1 * ctx->M[j])); 159 | } 160 | 161 | estimate = ctx->alphaMM * (1 / sum); 162 | 163 | if (estimate <= (5.0 / 2.0) * ctx->m) { 164 | /* 165 | * Small range correction: 166 | * Empty buckets may be too many, using linear counting estimator 167 | * instead. 168 | * */ 169 | for (z = 0; z < ctx->m; z++) { 170 | if (ctx->M[z] == 0) { 171 | zeros++; 172 | } 173 | } 174 | return (int64_t)round(ctx->m * log(ctx->m / zeros)); 175 | } else if (estimate <= (1.0 / 30.0) * POW_2_32) { 176 | /* Intermediate range - no correction */ 177 | return (int64_t)round(estimate); 178 | } else { 179 | /* Large range correction */ 180 | return (int64_t)round((NEGATIVE_POW_2_32 * log(1.0 - (estimate / POW_2_32)))); 181 | } 182 | } 183 | 184 | int hll_cnt_offer(hll_cnt_ctx_t *ctx, const void *buf, uint32_t len) 185 | { 186 | int modified = 0; 187 | uint64_t x, j; 188 | uint8_t r, hl; 189 | 190 | if (!ctx) { 191 | return -1; 192 | } 193 | 194 | switch (ctx->hf) { 195 | case CCARD_HASH_LOOKUP3: 196 | x = lookup3ycs64_2((const char *)buf); 197 | hl = 64; 198 | break; 199 | case CCARD_HASH_MURMUR64: 200 | x = (uint64_t)murmurhash64_no_seed((void *)buf, len); 201 | hl = 64; 202 | break; 203 | case CCARD_HASH_MURMUR: 204 | default: 205 | /* default to use murmurhash function */ 206 | x = (uint64_t)murmurhash((void *)buf, len, -1); 207 | hl = 32; 208 | } 209 | 210 | j = x >> (hl - ctx->log2m); 211 | r = (uint8_t)(num_of_trail_zeros(x << (ctx->log2m + 64 - hl)) - (ctx->log2m + 64 - hl) + 1); 212 | if (ctx->M[j] < r) { 213 | ctx->M[j] = r; 214 | 215 | modified = 1; 216 | } 217 | 218 | ctx->err = CCARD_OK; 219 | return modified; 220 | } 221 | 222 | int hll_cnt_get_raw_bytes(hll_cnt_ctx_t *ctx, void *buf, uint32_t *len) 223 | { 224 | uint8_t *out = (uint8_t *)buf; 225 | 226 | if (!ctx || !len || (buf && *len < ctx->m)) { 227 | return -1; 228 | } 229 | 230 | if (out) { 231 | memcpy(out, ctx->M, ctx->m); 232 | } 233 | *len = ctx->m; 234 | 235 | return 0; 236 | } 237 | 238 | int hll_cnt_get_bytes(hll_cnt_ctx_t *ctx, void *buf, uint32_t *len) 239 | { 240 | /* 241 | +--------------+---------+------------------------------+-----------+ 242 | | algorithm[1] | hash[1] | bitmap length(base-2 log)[1] | bitmap[n] | 243 | +--------------+---------+------------------------------+-----------+ 244 | */ 245 | uint8_t algo = CCARD_ALGO_HYPERLOGLOG; 246 | uint8_t *out = (uint8_t *)buf; 247 | 248 | if (!ctx || !len || (buf && *len < ctx->m + 3)) { 249 | return -1; 250 | } 251 | 252 | if (buf) { 253 | out[0] = algo; 254 | out[1] = ctx->hf; 255 | out[2] = ctx->log2m; 256 | memcpy(&out[3], ctx->M, ctx->m); 257 | } 258 | *len = ctx->m + 3; 259 | 260 | return 0; 261 | } 262 | 263 | int hll_cnt_merge(hll_cnt_ctx_t *ctx, hll_cnt_ctx_t *tbm, ...) 264 | { 265 | va_list vl; 266 | hll_cnt_ctx_t *bm; 267 | uint32_t i; 268 | 269 | if (!ctx) { 270 | return -1; 271 | } 272 | 273 | if (tbm) { 274 | /* Cannot merge bitmap of different sizes or different hash functions */ 275 | if ((tbm->m != ctx->m) || (tbm->hf != ctx->hf)) { 276 | ctx->err = CCARD_ERR_MERGE_FAILED; 277 | return -1; 278 | } 279 | 280 | for (i = 1; i < ctx->m; i++) { 281 | if (tbm->M[i] > ctx->M[i]) { 282 | ctx->M[i] = tbm->M[i]; 283 | } 284 | } 285 | 286 | va_start(vl, tbm); 287 | while ((bm = va_arg(vl, hll_cnt_ctx_t *)) != NULL) { 288 | if ((tbm->m != ctx->m) || (tbm->hf != ctx->hf)) { 289 | ctx->err = CCARD_ERR_MERGE_FAILED; 290 | return -1; 291 | } 292 | 293 | for (i = 1; i < ctx->m; i++) { 294 | if (bm->M[i] > ctx->M[i]) { 295 | ctx->M[i] = bm->M[i]; 296 | } 297 | } 298 | } 299 | va_end(vl); 300 | } 301 | 302 | ctx->err = CCARD_OK; 303 | return 0; 304 | } 305 | 306 | int hll_cnt_merge_raw_bytes(hll_cnt_ctx_t *ctx, const void *buf, uint32_t len, ...) 307 | { 308 | va_list vl; 309 | uint8_t *in; 310 | hll_cnt_ctx_t *bctx; 311 | 312 | if (!ctx) { 313 | return -1; 314 | } 315 | 316 | if (buf) { 317 | in = (uint8_t *)buf; 318 | /* Cannot merge bitmap of different sizes */ 319 | if (ctx->m != len) { 320 | ctx->err = CCARD_ERR_MERGE_FAILED; 321 | return -1; 322 | } 323 | 324 | bctx = hll_cnt_raw_init(in, ctx->m, ctx->hf); 325 | hll_cnt_merge(ctx, bctx, NULL); 326 | hll_cnt_fini(bctx); 327 | 328 | va_start(vl, len); 329 | while ((in = (uint8_t *)va_arg(vl, const void *)) != NULL) { 330 | len = va_arg(vl, uint32_t); 331 | 332 | if (ctx->m != len) { 333 | ctx->err = CCARD_ERR_MERGE_FAILED; 334 | return -1; 335 | } 336 | 337 | bctx = hll_cnt_raw_init(in, ctx->m, ctx->hf); 338 | hll_cnt_merge(ctx, bctx, NULL); 339 | hll_cnt_fini(bctx); 340 | } 341 | va_end(vl); 342 | } 343 | 344 | ctx->err = CCARD_OK; 345 | return 0; 346 | } 347 | 348 | int hll_cnt_merge_bytes(hll_cnt_ctx_t *ctx, const void *buf, uint32_t len, ...) 349 | { 350 | va_list vl; 351 | uint8_t *in; 352 | hll_cnt_ctx_t *bctx; 353 | 354 | if (!ctx) { 355 | return -1; 356 | } 357 | 358 | if (buf) { 359 | in = (uint8_t *)buf; 360 | /* Cannot merge bitmap of different sizes, 361 | different hash functions or different algorithms */ 362 | if ((ctx->m + 3 != len) || 363 | (in[0] != CCARD_ALGO_HYPERLOGLOG) || 364 | (in[1] != ctx->hf)) { 365 | 366 | ctx->err = CCARD_ERR_MERGE_FAILED; 367 | return -1; 368 | } 369 | 370 | bctx = hll_cnt_init(in, len, ctx->hf); 371 | hll_cnt_merge(ctx, bctx, NULL); 372 | hll_cnt_fini(bctx); 373 | 374 | va_start(vl, len); 375 | while ((in = (uint8_t *)va_arg(vl, const void *)) != NULL) { 376 | len = va_arg(vl, uint32_t); 377 | 378 | if ((ctx->m + 3 != len) || 379 | (in[0] != CCARD_ALGO_HYPERLOGLOG) || 380 | (in[1] != ctx->hf)) { 381 | 382 | ctx->err = CCARD_ERR_MERGE_FAILED; 383 | return -1; 384 | } 385 | 386 | bctx = hll_cnt_init(in, len, ctx->hf); 387 | hll_cnt_merge(ctx, bctx, NULL); 388 | hll_cnt_fini(bctx); 389 | } 390 | va_end(vl); 391 | } 392 | 393 | ctx->err = CCARD_OK; 394 | return 0; 395 | } 396 | 397 | int hll_cnt_reset(hll_cnt_ctx_t *ctx) 398 | { 399 | if (!ctx) { 400 | return -1; 401 | } 402 | 403 | ctx->err = CCARD_OK; 404 | memset(ctx->M, 0, ctx->m); 405 | 406 | return 0; 407 | } 408 | 409 | int hll_cnt_fini(hll_cnt_ctx_t *ctx) 410 | { 411 | if (ctx) { 412 | free(ctx); 413 | return 0; 414 | } 415 | 416 | return -1; 417 | } 418 | 419 | int hll_cnt_errnum(hll_cnt_ctx_t *ctx) 420 | { 421 | if (ctx) { 422 | return ctx->err; 423 | } 424 | 425 | return CCARD_ERR_INVALID_CTX; 426 | } 427 | 428 | const char *hll_cnt_errstr(int err) 429 | { 430 | static const char *msg[] = { 431 | "No error", 432 | "Invalid algorithm context", 433 | "Merge bitmap failed", 434 | NULL 435 | }; 436 | 437 | if (-err >= 0 && -err < (int)(sizeof(msg) / sizeof(msg[0]) - 1)) { 438 | return msg[-err]; 439 | } 440 | 441 | return "Invalid error number"; 442 | } 443 | 444 | // vi:ft=c ts=4 sw=4 fdm=marker et 445 | -------------------------------------------------------------------------------- /src/linear_counting.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | #include 6 | #include "murmurhash.h" 7 | #include "lookup3hash.h" 8 | #include "linear_counting.h" 9 | 10 | struct lnr_cnt_ctx_s { 11 | int err; 12 | uint32_t m; 13 | uint32_t length; 14 | uint32_t count; 15 | uint8_t hf; 16 | uint8_t M[1]; 17 | }; 18 | 19 | static uint8_t count_ones(uint8_t b) 20 | { 21 | uint8_t ones = 0; 22 | 23 | while (b > 0) { 24 | ones += (b & 0x01); 25 | b >>= 1; 26 | } 27 | 28 | return ones; 29 | } 30 | 31 | static uint8_t calc_log2m(uint32_t m) 32 | { 33 | uint8_t log2m = 0; 34 | 35 | while((m & 0x01) == 0) { 36 | log2m++; 37 | m >>= 1; 38 | } 39 | 40 | return log2m; 41 | } 42 | 43 | lnr_cnt_ctx_t *lnr_cnt_raw_init(const void *obuf, uint32_t len_or_k, uint8_t hf) 44 | { 45 | lnr_cnt_ctx_t *ctx; 46 | uint8_t *buf = (uint8_t *)obuf; 47 | uint32_t i; 48 | 49 | if (len_or_k == 0) { 50 | // invalid buffer length or k 51 | return NULL; 52 | } 53 | 54 | if (buf) { 55 | // initial bitmap was given 56 | if ((len_or_k & (len_or_k - 1)) != 0) { 57 | // invalid buffer size, its length must be a power of 2 58 | return NULL; 59 | } 60 | 61 | ctx = (lnr_cnt_ctx_t *)malloc(sizeof(lnr_cnt_ctx_t) + len_or_k - 1); 62 | ctx->m = len_or_k; 63 | ctx->length = 8 * ctx->m; 64 | ctx->count = ctx->length; 65 | memcpy(ctx->M, buf, len_or_k); 66 | 67 | for (i = 0; i < len_or_k; i++) { 68 | ctx->count -= count_ones(ctx->M[i]); 69 | } 70 | } else { 71 | // k was given 72 | ctx = (lnr_cnt_ctx_t *)malloc(sizeof(lnr_cnt_ctx_t) + (1 << len_or_k) - 1); 73 | ctx->m = (1 << len_or_k); 74 | ctx->length = 8 * ctx->m; 75 | ctx->count = ctx->length; 76 | memset(ctx->M, 0, ctx->m); 77 | } 78 | ctx->err = CCARD_OK; 79 | ctx->hf = hf; 80 | 81 | return ctx; 82 | } 83 | 84 | lnr_cnt_ctx_t *lnr_cnt_init(const void *obuf, uint32_t len_or_k, uint8_t hf) 85 | { 86 | uint8_t *buf = (uint8_t *)obuf; 87 | 88 | if (buf) { 89 | // initial bitmap was given 90 | if(len_or_k <= 3) { 91 | return NULL; 92 | } 93 | 94 | uint32_t data_segment_size = len_or_k - 3; 95 | uint8_t log2m = calc_log2m(data_segment_size); 96 | 97 | if (buf[0] != CCARD_ALGO_LINEAR || 98 | buf[1] != hf || 99 | buf[2] != log2m) { 100 | 101 | // counting algorithm, hash function or length not match 102 | return NULL; 103 | } 104 | 105 | return lnr_cnt_raw_init(buf + 3, data_segment_size, hf); 106 | } 107 | 108 | return lnr_cnt_raw_init(NULL, len_or_k, hf); 109 | } 110 | 111 | int64_t lnr_cnt_card(lnr_cnt_ctx_t *ctx) 112 | { 113 | if (!ctx) { 114 | return -1; 115 | } 116 | 117 | return (int64_t)round(ctx->length * (log(ctx->length / (double)ctx->count))); 118 | } 119 | 120 | int lnr_cnt_offer(lnr_cnt_ctx_t *ctx, const void *buf, uint32_t len) 121 | { 122 | int modified = 0; 123 | uint64_t hash; 124 | uint32_t bit, i; 125 | uint8_t b, mask; 126 | 127 | if (!ctx) { 128 | return -1; 129 | } 130 | 131 | switch (ctx->hf) { 132 | case CCARD_HASH_LOOKUP3: 133 | hash = lookup3ycs64_2((const char *)buf); 134 | break; 135 | case CCARD_HASH_MURMUR: 136 | default: 137 | /* default to use murmurhash function */ 138 | hash = (uint64_t)murmurhash((void *)buf, len, -1); 139 | } 140 | 141 | bit = (uint32_t)((hash & 0xFFFFFFFF) % (uint64_t)ctx->length); 142 | i = bit / 8; 143 | b = ctx->M[i]; 144 | mask = (uint8_t)(1 << (bit % 8)); 145 | if ((mask & b) == 0) { 146 | ctx->M[i] = (uint8_t)(b | mask); 147 | ctx->count--; 148 | modified = 1; 149 | } 150 | 151 | ctx->err = CCARD_OK; 152 | return modified; 153 | } 154 | 155 | int lnr_cnt_get_raw_bytes(lnr_cnt_ctx_t *ctx, void *buf, uint32_t *len) 156 | { 157 | uint8_t *out = (uint8_t *)buf; 158 | 159 | if (!ctx || !len || (buf && *len < ctx->m + 3)) { 160 | return -1; 161 | } 162 | 163 | if (buf) { 164 | memcpy(out, ctx->M, ctx->m); 165 | } 166 | *len = ctx->m; 167 | 168 | return 0; 169 | } 170 | 171 | int lnr_cnt_get_bytes(lnr_cnt_ctx_t *ctx, void *buf, uint32_t *len) 172 | { 173 | /* 174 | +--------------+---------+------------------------------+-----------+ 175 | | algorithm[1] | hash[1] | bitmap length(base-2 log)[1] | bitmap[n] | 176 | +--------------+---------+------------------------------+-----------+ 177 | */ 178 | uint8_t algo = CCARD_ALGO_LINEAR; 179 | uint8_t *out = (uint8_t *)buf; 180 | uint8_t log2m = calc_log2m(ctx->m); 181 | 182 | if (!ctx || !len || (buf && *len < ctx->m + 3)) { 183 | return -1; 184 | } 185 | 186 | if (buf) { 187 | out[0] = algo; 188 | out[1] = ctx->hf; 189 | out[2] = log2m; 190 | memcpy(&out[3], ctx->M, ctx->m); 191 | } 192 | *len = ctx->m + 3; 193 | 194 | return 0; 195 | } 196 | 197 | int lnr_cnt_merge(lnr_cnt_ctx_t *ctx, lnr_cnt_ctx_t *tbm, ...) 198 | { 199 | va_list vl; 200 | lnr_cnt_ctx_t *bm; 201 | uint32_t i; 202 | 203 | if (!ctx) { 204 | return -1; 205 | } 206 | 207 | if (tbm) { 208 | /* Cannot merge bitmap of different sizes or different hash functions */ 209 | if ((tbm->m != ctx->m) || (tbm->hf != ctx->hf)) { 210 | ctx->err = CCARD_ERR_MERGE_FAILED; 211 | return -1; 212 | } 213 | 214 | for (i = 0; i < ctx->m; i++) { 215 | ctx->M[i] |= tbm->M[i]; 216 | } 217 | 218 | va_start(vl, tbm); 219 | while ((bm = va_arg(vl, lnr_cnt_ctx_t *)) != NULL) { 220 | if ((tbm->m != ctx->m) || (tbm->hf != ctx->hf)) { 221 | ctx->err = CCARD_ERR_MERGE_FAILED; 222 | return -1; 223 | } 224 | 225 | for (i = 1; i < ctx->m; i++) { 226 | ctx->M[i] |= tbm->M[i]; 227 | } 228 | } 229 | va_end(vl); 230 | 231 | ctx->count = ctx->length; 232 | for (i = 0; i < ctx->m; i++) { 233 | ctx->count -= count_ones(ctx->M[i]); 234 | } 235 | } 236 | 237 | ctx->err = CCARD_OK; 238 | return 0; 239 | } 240 | 241 | int lnr_cnt_merge_raw_bytes(lnr_cnt_ctx_t *ctx, const void *buf, uint32_t len, ...) 242 | { 243 | va_list vl; 244 | uint8_t *in; 245 | lnr_cnt_ctx_t *bctx; 246 | 247 | if (!ctx) { 248 | return -1; 249 | } 250 | 251 | if (buf) { 252 | in = (uint8_t *)buf; 253 | /* Cannot merge bitmap of different sizes */ 254 | if (ctx->m != len) { 255 | ctx->err = CCARD_ERR_MERGE_FAILED; 256 | return -1; 257 | } 258 | 259 | bctx = lnr_cnt_raw_init(in, ctx->m, ctx->hf); 260 | lnr_cnt_merge(ctx, bctx, NULL); 261 | lnr_cnt_fini(bctx); 262 | 263 | va_start(vl, len); 264 | while ((in = (uint8_t *)va_arg(vl, const void *)) != NULL) { 265 | len = va_arg(vl, uint32_t); 266 | 267 | if (ctx->m != len) { 268 | ctx->err = CCARD_ERR_MERGE_FAILED; 269 | return -1; 270 | } 271 | 272 | bctx = lnr_cnt_raw_init(in, ctx->m, ctx->hf); 273 | lnr_cnt_merge(ctx, bctx, NULL); 274 | lnr_cnt_fini(bctx); 275 | } 276 | va_end(vl); 277 | } 278 | 279 | ctx->err = CCARD_OK; 280 | return 0; 281 | } 282 | 283 | int lnr_cnt_merge_bytes(lnr_cnt_ctx_t *ctx, const void *buf, uint32_t len, ...) 284 | { 285 | va_list vl; 286 | uint8_t *in; 287 | lnr_cnt_ctx_t *bctx; 288 | 289 | if (!ctx) { 290 | return -1; 291 | } 292 | 293 | if (buf) { 294 | in = (uint8_t *)buf; 295 | /* Cannot merge bitmap of different sizes, 296 | different hash functions or different algorithms */ 297 | if ((ctx->m + 3 != len) || 298 | (in[0] != CCARD_ALGO_LINEAR) || 299 | (in[1] != ctx->hf)) { 300 | 301 | ctx->err = CCARD_ERR_MERGE_FAILED; 302 | return -1; 303 | } 304 | 305 | bctx = lnr_cnt_init(in, len, ctx->hf); 306 | lnr_cnt_merge(ctx, bctx, NULL); 307 | lnr_cnt_fini(bctx); 308 | 309 | va_start(vl, len); 310 | while ((in = (uint8_t *)va_arg(vl, const void *)) != NULL) { 311 | len = va_arg(vl, uint32_t); 312 | 313 | if ((ctx->m + 3 != len) || 314 | (in[0] != CCARD_ALGO_LINEAR) || 315 | (in[1] != ctx->hf)) { 316 | 317 | ctx->err = CCARD_ERR_MERGE_FAILED; 318 | return -1; 319 | } 320 | 321 | bctx = lnr_cnt_init(in, len, ctx->hf); 322 | lnr_cnt_merge(ctx, bctx, NULL); 323 | lnr_cnt_fini(bctx); 324 | } 325 | va_end(vl); 326 | } 327 | 328 | ctx->err = CCARD_OK; 329 | return 0; 330 | } 331 | 332 | int lnr_cnt_reset(lnr_cnt_ctx_t *ctx) 333 | { 334 | if (!ctx) { 335 | return -1; 336 | } 337 | 338 | ctx->count = ctx->length; 339 | ctx->err = CCARD_OK; 340 | memset(ctx->M, 0, ctx->m); 341 | 342 | return 0; 343 | } 344 | 345 | int lnr_cnt_fini(lnr_cnt_ctx_t *ctx) 346 | { 347 | if (ctx) { 348 | free(ctx); 349 | return 0; 350 | } 351 | 352 | return -1; 353 | } 354 | 355 | int lnr_cnt_errnum(lnr_cnt_ctx_t *ctx) 356 | { 357 | if (ctx) { 358 | return ctx->err; 359 | } 360 | 361 | return CCARD_ERR_INVALID_CTX; 362 | } 363 | 364 | const char *lnr_cnt_errstr(int err) 365 | { 366 | static const char *msg[] = { 367 | "No error", 368 | "Invalid algorithm context", 369 | "Merge bitmap failed", 370 | NULL 371 | }; 372 | 373 | if (-err >= 0 && -err < (int)(sizeof(msg) / sizeof(msg[0]) - 1)) { 374 | return msg[-err]; 375 | } 376 | 377 | return "Invalid error number"; 378 | } 379 | 380 | // vi:ft=c ts=4 sw=4 fdm=marker et 381 | 382 | -------------------------------------------------------------------------------- /src/lookup3hash.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include "lookup3hash.h" 4 | 5 | uint32_t lookup3(const uint32_t *k, uint32_t offset, uint32_t length, uint32_t initval) 6 | { 7 | uint32_t a, b, c; 8 | uint32_t i = offset; 9 | a = b = c = 0xdeadbeef + (length << 2) + initval; 10 | 11 | while (length > 3) { 12 | a += k[i]; 13 | b += k[i + 1]; 14 | c += k[i + 2]; 15 | 16 | a -= c; a ^= (c << 4) | (c >> (0x1f & -4)); c += b; 17 | b -= a; b ^= (a << 6) | (a >> (0x1f & -6)); a += c; 18 | c -= b; c ^= (b << 8) | (b >> (0x1f & -8)); b += a; 19 | a -= c; a ^= (c << 16) | (c >> (0x1f & -16)); c += b; 20 | b -= a; b ^= (a << 19) | (a >> (0x1f & -19)); a += c; 21 | c -= b; c ^= (b << 4) | (b >> (0x1f & -4)); b += a; 22 | 23 | length -= 3; 24 | i += 3; 25 | } 26 | 27 | switch(length) { 28 | case 3 : c += k[i + 2]; // fall through 29 | case 2 : b += k[i + 1]; // fall through 30 | case 1 : a += k[i + 0]; // fall through 31 | c ^= b; c -= (b << 14) | (b >> (0x1f & -14)); 32 | a ^= c; a -= (c << 11) | (c >> (0x1f & -11)); 33 | b ^= a; b -= (a << 25) | (a >> (0x1f & -25)); 34 | c ^= b; c -= (b << 16) | (b >> (0x1f & -16)); 35 | a ^= c; a -= (c << 4) | (c >> (0x1f & -4)); 36 | b ^= a; b -= (a << 14) | (a >> (0x1f & -14)); 37 | c ^= b; c -= (b << 24) | (b >> (0x1f & -24)); 38 | case 0: 39 | break; 40 | } 41 | return c; 42 | } 43 | 44 | uint32_t lookup3ycs(const uint32_t *k, uint32_t offset, uint32_t length, uint32_t initval) 45 | { 46 | return lookup3(k, offset, length, initval - (length << 2)); 47 | } 48 | 49 | uint32_t lookup3ycs_2(const char *s, uint32_t start, uint32_t end, uint32_t initval) 50 | { 51 | uint32_t a, b, c; 52 | uint32_t i = start; 53 | a = b = c = 0xdeadbeef + initval; 54 | uint8_t mixed = 1; // have the 3 state variables been adequately mixed? 55 | 56 | for(;;) { 57 | if (i >= end) break; 58 | mixed = 0; 59 | char ch; 60 | ch = s[i++]; 61 | a += ch; 62 | if (i >= end) break; 63 | ch = s[i++]; 64 | b += ch; 65 | if (i >= end) break; 66 | ch = s[i++]; 67 | c += ch; 68 | if (i >= end) break; 69 | 70 | a -= c; a ^= (c << 4) | (c >> (0x1f & -4)); c += b; 71 | b -= a; b ^= (a << 6) | (a >> (0x1f & -6)); a += c; 72 | c -= b; c ^= (b << 8) | (b >> (0x1f & -8)); b += a; 73 | a -= c; a ^= (c << 16) | (c >> (0x1f & -16)); c += b; 74 | b -= a; b ^= (a << 19) | (a >> (0x1f & -19)); a += c; 75 | c -= b; c ^= (b << 4) | (b >> (0x1f & -4)); b += a; 76 | 77 | mixed = 1; 78 | } 79 | 80 | if (mixed == 0) { 81 | c ^= b; c -= (b << 14) | (b >> (0x1f & -14)); 82 | a ^= c; a -= (c << 11) | (c >> (0x1f & -11)); 83 | b ^= a; b -= (a << 25) | (a >> (0x1f & -25)); 84 | c ^= b; c -= (b << 16) | (b >> (0x1f & -16)); 85 | a ^= c; a -= (c << 4) | (c >> (0x1f & -4)); 86 | b ^= a; b -= (a << 14) | (a >> (0x1f & -14)); 87 | c ^= b; c -= (b << 24) | (b >> (0x1f & -24)); 88 | } 89 | 90 | return c; 91 | } 92 | 93 | uint64_t lookup3ycs64(const char *s, uint32_t start, uint32_t end, uint64_t initval) 94 | { 95 | uint32_t a, b, c; 96 | uint32_t i = start; 97 | a = b = c = 0xdeadbeef + (uint32_t)initval; 98 | c += (uint32_t)(initval >> 32); 99 | 100 | uint8_t mixed = 1; // have the 3 state variables been adequately mixed? 101 | for(;;) { 102 | if (i >= end) break; 103 | mixed = 0; 104 | char ch; 105 | ch = s[i++]; 106 | a += ch; 107 | if (i >= end) break; 108 | ch = s[i++]; 109 | b += ch; 110 | if (i >= end) break; 111 | ch = s[i++]; 112 | c += ch; 113 | if (i >= end) break; 114 | 115 | a -= c; a ^= ( c << 4) | (c >> (0x1f & -4)); c += b; 116 | b -= a; b ^= ( a << 6) | (a >> (0x1f & -6)); a += c; 117 | c -= b; c ^= ( b << 8) | (b >> (0x1f & -8)); b += a; 118 | a -= c; a ^= ( c << 16) | (c >> (0x1f & -16)); c += b; 119 | b -= a; b ^= ( a << 19) | (a >> (0x1f & -19)); a += c; 120 | c -= b; c ^= ( b << 4) | (b >> (0x1f & -4)); b += a; 121 | 122 | mixed = 1; 123 | } 124 | 125 | if (mixed == 0) { 126 | c ^= b; c -= (b << 14) | (b >> (0x1f & -14)); 127 | a ^= c; a -= (c << 11) | (c >> (0x1f & -11)); 128 | b ^= a; b -= (a << 25) | (a >> (0x1f & -25)); 129 | c ^= b; c -= (b << 16) | (b >> (0x1f & -16)); 130 | a ^= c; a -= (c << 4) | (c >> (0x1f & -4)); 131 | b ^= a; b -= (a << 14) | (a >> (0x1f & -14)); 132 | c ^= b; c -= (b << 24) | (b >> (0x1f & -24)); 133 | } 134 | 135 | return c + ((uint64_t)b << 32); 136 | } 137 | 138 | uint64_t lookup3ycs64_2(const char *s) 139 | { 140 | return lookup3ycs64(s, 0, strlen(s), -1); 141 | } 142 | 143 | // vi:ft=c ts=4 sw=4 fdm=marker et 144 | 145 | -------------------------------------------------------------------------------- /src/murmurhash.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include "murmurhash.h" 3 | 4 | uint32_t murmurhash(void *buf, uint32_t len, uint32_t seed) 5 | { 6 | uint8_t *data = (uint8_t *)buf; 7 | uint32_t m = 0x5bd1e995; 8 | uint32_t r = 24; 9 | uint32_t h = seed ^ len; 10 | uint32_t len_4 = len >> 2; 11 | uint32_t i; 12 | uint32_t len_m; 13 | uint32_t left; 14 | 15 | for(i = 0; i < len_4; i++) { 16 | uint32_t i_4 = i << 2; 17 | uint32_t k = data[i_4 + 3]; 18 | k <<= 8; 19 | k |= data[i_4 + 2] & 0xff; 20 | k <<= 8; 21 | k |= data[i_4 + 1] & 0xff; 22 | k <<= 8; 23 | k |= data[i_4 + 0] & 0xff; 24 | k *= m; 25 | k ^= k >> r; 26 | k *= m; 27 | h *= m; 28 | h ^= k; 29 | } 30 | 31 | // avoid calculating modulo 32 | len_m = len_4 << 2; 33 | left = len - len_m; 34 | 35 | if (left != 0) { 36 | if (left >= 3) { 37 | h ^= data[len - 3] << 16; 38 | } 39 | if (left >= 2) { 40 | h ^= data[len - 2] << 8; 41 | } 42 | if (left >= 1) { 43 | h ^= data[len - 1]; 44 | } 45 | h *= m; 46 | } 47 | 48 | h ^= h >> 13; 49 | h *= m; 50 | h ^= h >> 15; 51 | 52 | return h; 53 | } 54 | 55 | uint32_t murmurhash_long(uint64_t data) 56 | { 57 | uint32_t m = 0x5bd1e995; 58 | uint32_t r = 24; 59 | uint32_t h = 0; 60 | uint32_t k = (uint32_t)(data * m); 61 | 62 | k ^= k >> r; 63 | h ^= k * m; 64 | 65 | k = (data >> 32) * m; 66 | k ^= k >> r; 67 | h *= m; 68 | h ^= k * m; 69 | 70 | h ^= h >> 13; 71 | h *= m; 72 | h ^= h >> 15; 73 | 74 | return h; 75 | } 76 | 77 | uint64_t murmurhash64(void *buf, uint32_t len, uint32_t seed) 78 | { 79 | uint8_t *data = (uint8_t *)buf; 80 | uint64_t m = 0xc6a4a7935bd1e995L; 81 | uint32_t r = 47; 82 | uint64_t h = (seed & 0xffffffffl) ^ (len * m); 83 | uint32_t len8 = len / 8; 84 | uint32_t i; 85 | 86 | for (i = 0; i < len8; i++) { 87 | uint32_t i8 = i * 8; 88 | uint64_t k = ((uint64_t) data[i8 + 0] & 0xff) + (((uint64_t) data[i8 + 1] & 0xff) << 8) 89 | + (((uint64_t) data[i8 + 2] & 0xff) << 16) + (((uint64_t) data[i8 + 3] & 0xff) << 24) 90 | + (((uint64_t) data[i8 + 4] & 0xff) << 32) + (((uint64_t) data[i8 + 5] & 0xff) << 40) 91 | + (((uint64_t) data[i8 + 6] & 0xff) << 48) + (((uint64_t) data[i8 + 7] & 0xff) << 56); 92 | 93 | k *= m; 94 | k ^= k >> r; 95 | k *= m; 96 | 97 | h ^= k; 98 | h *= m; 99 | } 100 | 101 | switch (len % 8) { 102 | case 7: 103 | h ^= (uint64_t) (data[(len & ~7) + 6] & 0xff) << 48; 104 | case 6: 105 | h ^= (uint64_t) (data[(len & ~7) + 5] & 0xff) << 40; 106 | case 5: 107 | h ^= (uint64_t) (data[(len & ~7) + 4] & 0xff) << 32; 108 | case 4: 109 | h ^= (uint64_t) (data[(len & ~7) + 3] & 0xff) << 24; 110 | case 3: 111 | h ^= (uint64_t) (data[(len & ~7) + 2] & 0xff) << 16; 112 | case 2: 113 | h ^= (uint64_t) (data[(len & ~7) + 1] & 0xff) << 8; 114 | case 1: 115 | h ^= (uint64_t) (data[len & ~7] & 0xff); 116 | h *= m; 117 | }; 118 | 119 | h ^= h >> r; 120 | h *= m; 121 | h ^= h >> r; 122 | 123 | return h; 124 | } 125 | 126 | uint64_t murmurhash64_no_seed(void *buf, uint32_t len) 127 | { 128 | return murmurhash64(buf, len, 0xe17a1465); 129 | } 130 | 131 | // vi:ft=c ts=4 sw=4 fdm=marker et 132 | 133 | -------------------------------------------------------------------------------- /src/register_set.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | #include 6 | #include "register_set.h" 7 | 8 | static const uint32_t LOG2_BITS_PER_WORD = 6; 9 | 10 | static const uint32_t REGISTER_SIZE = 5; 11 | 12 | static uint32_t rs_getbits(uint32_t count) 13 | { 14 | return (uint32_t)floor(count / LOG2_BITS_PER_WORD); 15 | } 16 | 17 | reg_set_t *rs_init(uint32_t count, uint32_t *values, uint32_t len) 18 | { 19 | uint32_t bits = rs_getbits(count); 20 | reg_set_t *rs; 21 | 22 | if (values) { 23 | rs = (reg_set_t *)malloc(sizeof(reg_set_t) + sizeof(uint32_t) * len - 1); 24 | memcpy(rs->M, values, len); 25 | rs->size = len; 26 | } else { 27 | if (bits == 0) { 28 | rs = (reg_set_t *)malloc(sizeof(reg_set_t)); 29 | memset(rs->M, 0, 1); 30 | rs->size = 1; 31 | } else if (bits % 32 == 0) { 32 | rs = (reg_set_t *)malloc(sizeof(reg_set_t) + sizeof(uint32_t) * bits - 1); 33 | memset(rs->M, 0, bits); 34 | rs->size = bits; 35 | } else { 36 | rs = (reg_set_t *)malloc(sizeof(reg_set_t) + sizeof(uint32_t) * bits); 37 | memset(rs->M, 0, bits + 1); 38 | rs->size = bits + 1; 39 | } 40 | } 41 | 42 | rs->count = count; 43 | 44 | return rs; 45 | } 46 | 47 | int rs_set(reg_set_t *rs, uint32_t pos, uint32_t value) 48 | { 49 | if (!rs) { 50 | return -1; 51 | } 52 | 53 | uint32_t bucket_pos = (uint32_t)floor(pos / LOG2_BITS_PER_WORD); 54 | uint32_t shift = REGISTER_SIZE * (pos - (bucket_pos * LOG2_BITS_PER_WORD)); 55 | rs->M[bucket_pos] = (rs->M[bucket_pos] & ~(0x1f << shift)) | (value << shift); 56 | 57 | return 0; 58 | } 59 | 60 | int rs_get(reg_set_t *rs, uint32_t pos, uint32_t *value) 61 | { 62 | if (!rs) { 63 | return -1; 64 | } 65 | 66 | uint32_t bucket_pos = (uint32_t)floor(pos / LOG2_BITS_PER_WORD); 67 | uint32_t shift = REGISTER_SIZE * (pos - (bucket_pos * LOG2_BITS_PER_WORD)); 68 | *value = (rs->M[bucket_pos] & (0x1f << shift)) >> shift; 69 | 70 | return 0; 71 | } 72 | 73 | int rs_bits(reg_set_t *rs, uint32_t *bits, uint32_t *len) 74 | { 75 | if (!rs || !bits || (*len < rs->size)) { 76 | return -1; 77 | } 78 | 79 | memcpy(bits, rs->M, rs->size); 80 | *len = rs->size; 81 | 82 | return 0; 83 | } 84 | 85 | int rs_fini(reg_set_t *rs) 86 | { 87 | if (rs) { 88 | free(rs); 89 | return 0; 90 | } 91 | 92 | return -1; 93 | } 94 | 95 | // vi:ft=c ts=4 sw=4 fdm=marker et 96 | 97 | -------------------------------------------------------------------------------- /src/sparse_bitmap.c: -------------------------------------------------------------------------------- 1 | #include "sparse_bitmap.h" 2 | 3 | int sparse_bytes_to_int(const uint8_t *bmp, int off, int len) 4 | { 5 | int r = 0; 6 | int i; 7 | 8 | for(i = len - 1; i >= 0; i--) { 9 | r = (r << 8) | bmp[off + i]; 10 | } 11 | 12 | return r; 13 | } 14 | 15 | 16 | void sparse_int_to_bytes(uint8_t *bmp, int off, int len, int val) 17 | { 18 | int i; 19 | int tmp = val; 20 | 21 | for(i = 0; i < len; i++) { 22 | bmp[off + i] = tmp & 0xff; 23 | tmp >>= 8; 24 | } 25 | } 26 | 27 | -------------------------------------------------------------------------------- /t/SConscript: -------------------------------------------------------------------------------- 1 | import os 2 | 3 | env = Environment( 4 | LIBS = ['gtest', 'gtest_main', 'pthread', 'ccard-lib.0.1'], 5 | CPPPATH = ['../include'], 6 | LIBPATH = ['../', '/usr/lib', '/usr/lib64', '/usr/local/lib', '/usr/local/lib64'], 7 | RPATH = ['./', '../'], 8 | CCFLAGS = ['-Wall', '-Wextra', '-Werror', '-g3'] 9 | ) 10 | env["CC"] = os.getenv("CC") or env["CC"] 11 | env["CXX"] = os.getenv("CXX") or env["CXX"] 12 | env["ENV"].update(x for x in os.environ.items() if x[0].startswith("CCC_")) 13 | 14 | p = env.Program('unittest', [Glob('*.cc')]) 15 | env.Alias('test', p, env.Action(Dir('#').abspath+'/t/unittest --gtest_color=yes')) 16 | env.AlwaysBuild('test') 17 | 18 | # vi:ft=python ts=4 sw=4 et fdm=marker 19 | -------------------------------------------------------------------------------- /t/adaptive_counting_unittest.cc: -------------------------------------------------------------------------------- 1 | #include "ccard_common.h" 2 | #include "adaptive_counting.h" 3 | #include "gtest/gtest.h" 4 | 5 | /** 6 | * Tests estimate. 7 | * 8 | *

9 | * From 1 to 500000 distinct elements print estimated value every 50000. 10 | * Use AdaptiveCounting and LogLogCounting algorithm 11 | *

12 | * */ 13 | TEST(AdaptiveCounting, RawCounting) 14 | { 15 | int rc; 16 | int64_t i, esti; 17 | adp_cnt_ctx_t *ctx1 = adp_cnt_raw_init(NULL, 16, CCARD_HASH_LOOKUP3); 18 | EXPECT_NE(ctx1, (adp_cnt_ctx_t *)NULL); 19 | adp_cnt_ctx_t *ctx2 = adp_cnt_raw_init(NULL, 16, CCARD_HASH_MURMUR); 20 | EXPECT_NE(ctx2, (adp_cnt_ctx_t *)NULL); 21 | 22 | printf("Adaptive Counting with Lookup3hash:\n"); 23 | for (i = 1; i <= 500000L; i++) { 24 | rc = adp_cnt_offer(ctx1, &i, sizeof(int64_t)); 25 | EXPECT_GE(rc, 0); 26 | 27 | if (i % 50000 == 0) { 28 | esti = adp_cnt_card(ctx1); 29 | EXPECT_GT(esti, 0); 30 | printf("actual: %9lu, estimated: %9lu, error: %+7.2f%%\n", 31 | (long unsigned int)i, (long unsigned int)esti, (double)(esti - i) / i * 100); 32 | } 33 | } 34 | printf("\n"); 35 | 36 | rc = adp_cnt_reset(ctx1); 37 | EXPECT_EQ(rc, 0); 38 | 39 | printf("Loglog Counting with Lookup3hash:\n"); 40 | for (i = 1; i <= 500000L; i++) { 41 | rc = adp_cnt_offer(ctx1, &i, sizeof(int64_t)); 42 | EXPECT_GE(rc, 0); 43 | 44 | if (i % 50000 == 0) { 45 | esti = adp_cnt_card_loglog(ctx1); 46 | EXPECT_GT(esti, 0); 47 | printf("actual: %9lu, estimated: %9lu, error: %+7.2f%%\n", 48 | (long unsigned int)i, (long unsigned int)esti, (double)(esti - i) / i * 100); 49 | } 50 | } 51 | printf("\n"); 52 | 53 | printf("Adaptive Counting with Murmurhash:\n"); 54 | for (i = 1; i <= 500000L; i++) { 55 | rc = adp_cnt_offer(ctx2, &i, sizeof(int64_t)); 56 | EXPECT_GE(rc, 0); 57 | 58 | if (i % 50000 == 0) { 59 | esti = adp_cnt_card(ctx2); 60 | EXPECT_GT(esti, 0); 61 | printf("actual: %9lu, estimated: %9lu, error: %+7.2f%%\n", 62 | (long unsigned int)i, (long unsigned int)esti, (double)(esti - i) / i * 100); 63 | } 64 | } 65 | printf("\n"); 66 | 67 | rc = adp_cnt_reset(ctx2); 68 | EXPECT_EQ(rc, 0); 69 | 70 | printf("Loglog Counting with Murmurhash:\n"); 71 | for (i = 1; i <= 500000L; i++) { 72 | rc = adp_cnt_offer(ctx2, &i, sizeof(int64_t)); 73 | EXPECT_GE(rc, 0); 74 | 75 | if (i % 50000 == 0) { 76 | esti = adp_cnt_card_loglog(ctx2); 77 | EXPECT_GT(esti, 0); 78 | printf("actual: %9lu, estimated: %9lu, error: %+7.2f%%\n", 79 | (long unsigned int)i, (long unsigned int)esti, (double)(esti - i) / i * 100); 80 | } 81 | } 82 | printf("\n"); 83 | 84 | rc = adp_cnt_fini(ctx2); 85 | EXPECT_EQ(rc, 0); 86 | rc = adp_cnt_fini(ctx1); 87 | EXPECT_EQ(rc, 0); 88 | } 89 | 90 | /** 91 | * Tests estimate. 92 | * 93 | *

94 | * From 1 to 500000 distinct elements print estimated value every 50000. 95 | * Use AdaptiveCounting and LogLogCounting algorithm 96 | *

97 | * */ 98 | TEST(AdaptiveCounting, Counting) 99 | { 100 | int rc; 101 | int64_t i, esti; 102 | adp_cnt_ctx_t *ctx1 = adp_cnt_init(NULL, 16, CCARD_HASH_LOOKUP3); 103 | EXPECT_NE(ctx1, (adp_cnt_ctx_t *)NULL); 104 | adp_cnt_ctx_t *ctx2 = adp_cnt_init(NULL, 16, CCARD_HASH_MURMUR); 105 | EXPECT_NE(ctx2, (adp_cnt_ctx_t *)NULL); 106 | 107 | printf("Adaptive Counting with Lookup3hash:\n"); 108 | for (i = 1; i <= 500000L; i++) { 109 | rc = adp_cnt_offer(ctx1, &i, sizeof(int64_t)); 110 | EXPECT_GE(rc, 0); 111 | 112 | if (i % 50000 == 0) { 113 | esti = adp_cnt_card(ctx1); 114 | EXPECT_GT(esti, 0); 115 | printf("actual: %9lu, estimated: %9lu, error: %+7.2f%%\n", 116 | (long unsigned int)i, (long unsigned int)esti, (double)(esti - i) / i * 100); 117 | } 118 | } 119 | printf("\n"); 120 | 121 | rc = adp_cnt_reset(ctx1); 122 | EXPECT_EQ(rc, 0); 123 | 124 | printf("Loglog Counting with Lookup3hash:\n"); 125 | for (i = 1; i <= 500000L; i++) { 126 | rc = adp_cnt_offer(ctx1, &i, sizeof(int64_t)); 127 | EXPECT_GE(rc, 0); 128 | 129 | if (i % 50000 == 0) { 130 | esti = adp_cnt_card_loglog(ctx1); 131 | EXPECT_GT(esti, 0); 132 | printf("actual: %9lu, estimated: %9lu, error: %+7.2f%%\n", 133 | (long unsigned int)i, (long unsigned int)esti, (double)(esti - i) / i * 100); 134 | } 135 | } 136 | printf("\n"); 137 | 138 | printf("Adaptive Counting with Murmurhash:\n"); 139 | for (i = 1; i <= 500000L; i++) { 140 | rc = adp_cnt_offer(ctx2, &i, sizeof(int64_t)); 141 | EXPECT_GE(rc, 0); 142 | 143 | if (i % 50000 == 0) { 144 | esti = adp_cnt_card(ctx2); 145 | EXPECT_GT(esti, 0); 146 | printf("actual: %9lu, estimated: %9lu, error: %+7.2f%%\n", 147 | (long unsigned int)i, (long unsigned int)esti, (double)(esti - i) / i * 100); 148 | } 149 | } 150 | printf("\n"); 151 | 152 | rc = adp_cnt_reset(ctx2); 153 | EXPECT_EQ(rc, 0); 154 | 155 | printf("Loglog Counting with Murmurhash:\n"); 156 | for (i = 1; i <= 500000L; i++) { 157 | rc = adp_cnt_offer(ctx2, &i, sizeof(int64_t)); 158 | EXPECT_GE(rc, 0); 159 | 160 | if (i % 50000 == 0) { 161 | esti = adp_cnt_card_loglog(ctx2); 162 | EXPECT_GT(esti, 0); 163 | printf("actual: %9lu, estimated: %9lu, error: %+7.2f%%\n", 164 | (long unsigned int)i, (long unsigned int)esti, (double)(esti - i) / i * 100); 165 | } 166 | } 167 | printf("\n"); 168 | 169 | rc = adp_cnt_fini(ctx2); 170 | EXPECT_EQ(rc, 0); 171 | rc = adp_cnt_fini(ctx1); 172 | EXPECT_EQ(rc, 0); 173 | } 174 | 175 | /** 176 | * Tests serialize, unserialize and merge. 177 | * 178 | *
    179 | *
  1. Current context contains 1 to 20000
  2. 180 | *
  3. Tbm1 that contains 10000 to 30000 be serialized as buf1
  4. 181 | *
  5. Tbm2 that contains 20000 to 40000 be serialized as buf2
  6. 182 | *
  7. Merges buf1 and buf2 into current context
  8. 183 | *
184 | * */ 185 | TEST(AdaptiveCounting, RawMerge) 186 | { 187 | int rc; 188 | int64_t i, esti; 189 | adp_cnt_ctx_t *ctx = adp_cnt_raw_init(NULL, 16, CCARD_HASH_LOOKUP3); 190 | EXPECT_NE(ctx, (adp_cnt_ctx_t *)NULL); 191 | adp_cnt_ctx_t *tbm1 = adp_cnt_raw_init(NULL, 16, CCARD_HASH_LOOKUP3); 192 | EXPECT_NE(tbm1, (adp_cnt_ctx_t *)NULL); 193 | adp_cnt_ctx_t *tbm2 = adp_cnt_raw_init(NULL, 16, CCARD_HASH_LOOKUP3); 194 | EXPECT_NE(tbm2, (adp_cnt_ctx_t *)NULL); 195 | int32_t m = 1 << 16; 196 | uint8_t buf1[m], buf2[m]; 197 | uint32_t len1 = m, len2 = m; 198 | 199 | for (i = 1; i <= 20000L; i++) { 200 | rc = adp_cnt_offer(ctx, &i, sizeof(uint64_t)); 201 | EXPECT_GE(rc, 0); 202 | } 203 | for (i = 10000L; i <= 30000L; i++) { 204 | rc = adp_cnt_offer(tbm1, &i, sizeof(uint64_t)); 205 | EXPECT_GE(rc, 0); 206 | } 207 | rc = adp_cnt_get_raw_bytes(tbm1, buf1, &len1); 208 | EXPECT_EQ(rc, 0); 209 | for (i = 20000L; i <= 40000L; i++) { 210 | rc = adp_cnt_offer(tbm2, &i, sizeof(uint64_t)); 211 | EXPECT_GE(rc, 0); 212 | } 213 | rc = adp_cnt_get_raw_bytes(tbm2, buf2, &len2); 214 | EXPECT_EQ(rc, 0); 215 | 216 | rc = adp_cnt_merge_raw_bytes(ctx, buf1, len1, buf2, len2, NULL); 217 | EXPECT_EQ(rc, 0); 218 | esti = adp_cnt_card(ctx); 219 | EXPECT_GT(esti, 30000); 220 | printf("actual:40000, estimated: %9lu, error: %+7.2f%%\n", 221 | (long unsigned int)esti, (double)(esti - 40000) / 40000 * 100); 222 | 223 | rc = adp_cnt_fini(tbm2); 224 | EXPECT_EQ(rc, 0); 225 | rc = adp_cnt_fini(tbm1); 226 | EXPECT_EQ(rc, 0); 227 | rc = adp_cnt_fini(ctx); 228 | EXPECT_EQ(rc, 0); 229 | } 230 | 231 | /** 232 | * Tests serialize, unserialize and merge. 233 | * 234 | *
    235 | *
  1. Current context contains 1 to 20000
  2. 236 | *
  3. Tbm1 that contains 10000 to 30000 be serialized as buf1
  4. 237 | *
  5. Tbm2 that contains 20000 to 40000 be serialized as buf2
  6. 238 | *
  7. Merges buf1 and buf2 into current context
  8. 239 | *
240 | * */ 241 | TEST(AdaptiveCounting, Merge) 242 | { 243 | int rc; 244 | int64_t i, esti; 245 | adp_cnt_ctx_t *ctx = adp_cnt_init(NULL, 16, CCARD_HASH_LOOKUP3); 246 | EXPECT_NE(ctx, (adp_cnt_ctx_t *)NULL); 247 | adp_cnt_ctx_t *tbm1 = adp_cnt_init(NULL, 16, CCARD_HASH_LOOKUP3); 248 | EXPECT_NE(tbm1, (adp_cnt_ctx_t *)NULL); 249 | adp_cnt_ctx_t *tbm2 = adp_cnt_init(NULL, 16, CCARD_HASH_LOOKUP3); 250 | EXPECT_NE(tbm2, (adp_cnt_ctx_t *)NULL); 251 | int32_t m = 1 << 16; 252 | uint8_t buf1[m + 3], buf2[m + 3]; 253 | uint32_t len1 = m + 3, len2 = m + 3; 254 | 255 | for (i = 1; i <= 20000L; i++) { 256 | rc = adp_cnt_offer(ctx, &i, sizeof(uint64_t)); 257 | EXPECT_GE(rc, 0); 258 | } 259 | for (i = 10000L; i <= 30000L; i++) { 260 | rc = adp_cnt_offer(tbm1, &i, sizeof(uint64_t)); 261 | EXPECT_GE(rc, 0); 262 | } 263 | rc = adp_cnt_get_bytes(tbm1, buf1, &len1); 264 | EXPECT_EQ(rc, 0); 265 | for (i = 20000L; i <= 40000L; i++) { 266 | rc = adp_cnt_offer(tbm2, &i, sizeof(uint64_t)); 267 | EXPECT_GE(rc, 0); 268 | } 269 | rc = adp_cnt_get_bytes(tbm2, buf2, &len2); 270 | EXPECT_EQ(rc, 0); 271 | 272 | rc = adp_cnt_merge_bytes(ctx, buf1, len1, buf2, len2, NULL); 273 | EXPECT_EQ(rc, 0); 274 | esti = adp_cnt_card(ctx); 275 | EXPECT_GT(esti, 30000); 276 | printf("actual:40000, estimated: %9lu, error: %+7.2f%%\n", 277 | (long unsigned int)esti, (double)(esti - 40000) / 40000 * 100); 278 | 279 | rc = adp_cnt_fini(tbm2); 280 | EXPECT_EQ(rc, 0); 281 | rc = adp_cnt_fini(tbm1); 282 | EXPECT_EQ(rc, 0); 283 | rc = adp_cnt_fini(ctx); 284 | EXPECT_EQ(rc, 0); 285 | } 286 | 287 | /** 288 | * Do some statistics on the bitmap buckets 289 | * */ 290 | TEST(AdaptiveCounting, BucketStats) 291 | { 292 | int rc; 293 | int64_t i, esti; 294 | uint32_t len; 295 | uint8_t *bytes; 296 | int k = 13; 297 | adp_cnt_ctx_t *ctx = adp_cnt_init(NULL, k, CCARD_HASH_MURMUR); 298 | EXPECT_NE(ctx, (adp_cnt_ctx_t *)NULL); 299 | for(i = 1; i <= 10000L; i++) { 300 | rc = adp_cnt_offer(ctx, &i, sizeof(i)); 301 | EXPECT_GE(rc, 0); 302 | if (i % 100 == 0) { 303 | int ucnt = 0; 304 | int exp_size; 305 | esti = adp_cnt_card(ctx); 306 | EXPECT_GT(esti, 0); 307 | 308 | // Count used buckets in bitmap 309 | rc = adp_cnt_get_raw_bytes(ctx, NULL, &len); 310 | EXPECT_EQ(rc, 0); 311 | bytes = (uint8_t *)calloc(len, 1); 312 | rc = adp_cnt_get_raw_bytes(ctx, bytes, &len); 313 | EXPECT_EQ(rc, 0); 314 | for(int j = 0; j < (int)len; j++) { 315 | if(bytes[j] != 0) { 316 | ucnt++; 317 | } 318 | } 319 | free(bytes); 320 | 321 | // Calculate sparse storage costs: u*(log2(m)/8+1) 322 | exp_size = ((k + 7) / 8 + 1) * ucnt; 323 | printf("actual: %6lu, estimated: %6lu, error: %+6.2f%%, "\ 324 | "used buckets: %6d, used bucket ratio: %+6.2f%%, "\ 325 | "expect sparse storage: %6d, expect bmp ratio: %+6.2f%%\n", 326 | (long unsigned int)i, (long unsigned int)esti, 327 | (double)(esti - i) / i * 100, 328 | ucnt, (double)ucnt / len * 100, 329 | exp_size, (double)exp_size / len * 100); 330 | } 331 | } 332 | rc = adp_cnt_fini(ctx); 333 | EXPECT_EQ(rc, 0); 334 | } 335 | 336 | /** 337 | * Sanity tests on sparse bitmap 338 | * */ 339 | TEST(AdaptiveCounting, SparseSanity) 340 | { 341 | int rc; 342 | int k = 13; 343 | uint32_t len; 344 | int64_t i, esti; 345 | adp_cnt_ctx_t *ctx = adp_cnt_init(NULL, k, CCARD_HASH_MURMUR | CCARD_OPT_SPARSE); 346 | EXPECT_NE(ctx, (adp_cnt_ctx_t *)NULL); 347 | 348 | for(i = 1; i <= 5000L; i++) { 349 | rc = adp_cnt_offer(ctx, &i, sizeof(i)); 350 | EXPECT_GE(rc, 0); 351 | 352 | if(i % 100 == 0) { 353 | esti = adp_cnt_card(ctx); 354 | EXPECT_GT(esti, 0); 355 | 356 | rc = adp_cnt_get_raw_bytes(ctx, NULL, &len); 357 | EXPECT_EQ(rc, 0); 358 | EXPECT_LE(len, 1 << k); 359 | printf("actual: %6lu, estimated: %6lu, error: %+6.2f%%, "\ 360 | "storage size: %6d\n", 361 | (long unsigned int)i, (long unsigned int)esti, 362 | (double)(esti - i) / i * 100, 363 | len); 364 | } 365 | } 366 | 367 | rc = adp_cnt_fini(ctx); 368 | EXPECT_EQ(rc, 0); 369 | } 370 | 371 | /** 372 | * Merge sparse contexts only 373 | * 374 | *
    375 | *
  1. ctx1 contains 1 to 20
  2. 376 | *
  3. ctx2 contains 10 to 30
  4. 377 | *
  5. ctx3 contains 20 to 40
  6. 378 | *
  7. Merges ctx2 and ctx3 into ctx1
  8. 379 | *
380 | * */ 381 | TEST(AdaptiveCounting, SparseMergeCtx) 382 | { 383 | int rc; 384 | int k = 13; 385 | uint32_t len; 386 | int64_t i, esti; 387 | adp_cnt_ctx_t *ctx1, *ctx2, *ctx3; 388 | 389 | ctx1 = adp_cnt_init(NULL, k, CCARD_HASH_MURMUR | CCARD_OPT_SPARSE); 390 | for(i = 1; i <= 20; i++) { 391 | rc = adp_cnt_offer(ctx1, &i, sizeof(i)); 392 | EXPECT_GE(rc, 0); 393 | } 394 | 395 | ctx2 = adp_cnt_init(NULL, k, CCARD_HASH_MURMUR | CCARD_OPT_SPARSE); 396 | for(i = 10; i <= 30; i++) { 397 | rc = adp_cnt_offer(ctx2, &i, sizeof(i)); 398 | EXPECT_GE(rc, 0); 399 | } 400 | 401 | ctx3 = adp_cnt_init(NULL, k, CCARD_HASH_MURMUR | CCARD_OPT_SPARSE); 402 | for(i = 20; i <= 40; i++) { 403 | rc = adp_cnt_offer(ctx3, &i, sizeof(i)); 404 | EXPECT_GE(rc, 0); 405 | } 406 | 407 | rc = adp_cnt_merge(ctx1, ctx2, ctx3, NULL); 408 | EXPECT_GE(rc, 0); 409 | 410 | rc = adp_cnt_get_raw_bytes(ctx1, NULL, &len); 411 | EXPECT_GE(rc, 0); 412 | EXPECT_LT(len, 200); 413 | 414 | esti = adp_cnt_card(ctx1); 415 | EXPECT_GE(esti, 35); 416 | 417 | rc = adp_cnt_fini(ctx1); 418 | EXPECT_EQ(rc, 0); 419 | rc = adp_cnt_fini(ctx2); 420 | EXPECT_EQ(rc, 0); 421 | rc = adp_cnt_fini(ctx3); 422 | EXPECT_EQ(rc, 0); 423 | } 424 | 425 | /** 426 | * Merge sparse and normal contexts 427 | * 428 | *
    429 | *
  1. ctx1 contains 1 to 20
  2. 430 | *
  3. ctx2 contains 10 to 30000
  4. 431 | *
  5. ctx3 contains 20 to 40
  6. 432 | *
  7. Merges ctx2 and ctx3 into ctx1
  8. 433 | *
434 | * */ 435 | TEST(AdaptiveCounting, SparseNormalMergeCtx) 436 | { 437 | int rc; 438 | int k = 13; 439 | uint32_t len; 440 | int64_t i, esti; 441 | adp_cnt_ctx_t *ctx1, *ctx2, *ctx3; 442 | 443 | ctx1 = adp_cnt_init(NULL, k, CCARD_HASH_MURMUR | CCARD_OPT_SPARSE); 444 | for(i = 1; i <= 20; i++) { 445 | rc = adp_cnt_offer(ctx1, &i, sizeof(i)); 446 | EXPECT_GE(rc, 0); 447 | } 448 | 449 | ctx2 = adp_cnt_init(NULL, k, CCARD_HASH_MURMUR | CCARD_OPT_SPARSE); 450 | for(i = 10; i <= 30000; i++) { 451 | rc = adp_cnt_offer(ctx2, &i, sizeof(i)); 452 | EXPECT_GE(rc, 0); 453 | } 454 | 455 | ctx3 = adp_cnt_init(NULL, k, CCARD_HASH_MURMUR | CCARD_OPT_SPARSE); 456 | for(i = 20; i <= 40; i++) { 457 | rc = adp_cnt_offer(ctx3, &i, sizeof(i)); 458 | EXPECT_GE(rc, 0); 459 | } 460 | 461 | rc = adp_cnt_merge(ctx1, ctx2, ctx3, NULL); 462 | EXPECT_GE(rc, 0); 463 | 464 | rc = adp_cnt_get_raw_bytes(ctx1, NULL, &len); 465 | EXPECT_GE(rc, 0); 466 | EXPECT_EQ(len, 1 << k); 467 | 468 | esti = adp_cnt_card(ctx1); 469 | EXPECT_GE(esti, 29000); 470 | 471 | rc = adp_cnt_fini(ctx1); 472 | EXPECT_EQ(rc, 0); 473 | rc = adp_cnt_fini(ctx2); 474 | EXPECT_EQ(rc, 0); 475 | rc = adp_cnt_fini(ctx3); 476 | EXPECT_EQ(rc, 0); 477 | } 478 | 479 | /** 480 | * Serialize & deserialize dense bitmap 481 | * */ 482 | TEST(AdaptiveCounting, Deserialize) 483 | { 484 | adp_cnt_ctx_t *ctx = adp_cnt_init(NULL, 16, CCARD_HASH_MURMUR); 485 | EXPECT_NE(ctx, (adp_cnt_ctx_t *)NULL); 486 | 487 | for(int i = 1; i < 100; i++) { 488 | adp_cnt_offer(ctx, &i, sizeof(i)); 489 | } 490 | int64_t esti = adp_cnt_card(ctx); 491 | 492 | uint32_t num_bytes = 0; 493 | EXPECT_EQ(adp_cnt_get_bytes(ctx, NULL, &num_bytes), 0); 494 | 495 | uint8_t buf[num_bytes]; 496 | EXPECT_EQ(adp_cnt_get_bytes(ctx, buf, &num_bytes), 0); 497 | 498 | adp_cnt_ctx_t *other = adp_cnt_init(buf, num_bytes, CCARD_HASH_MURMUR); 499 | EXPECT_NE(other, (adp_cnt_ctx_t *)NULL); 500 | EXPECT_EQ(adp_cnt_card(other), esti); 501 | } 502 | 503 | /** 504 | * Serialize & deserialize sparse bitmap 505 | * */ 506 | TEST(AdaptiveCounting, SparseDeserialize) 507 | { 508 | adp_cnt_ctx_t *ctx = adp_cnt_init(NULL, 16, CCARD_HASH_MURMUR | CCARD_OPT_SPARSE); 509 | EXPECT_NE(ctx, (adp_cnt_ctx_t *)NULL); 510 | 511 | for(int i = 1; i < 100; i++) { 512 | adp_cnt_offer(ctx, &i, sizeof(i)); 513 | } 514 | int64_t esti = adp_cnt_card(ctx); 515 | 516 | uint32_t num_bytes = 0; 517 | EXPECT_EQ(adp_cnt_get_bytes(ctx, NULL, &num_bytes), 0); 518 | 519 | uint8_t buf[num_bytes]; 520 | EXPECT_EQ(adp_cnt_get_bytes(ctx, buf, &num_bytes), 0); 521 | 522 | adp_cnt_ctx_t *other = adp_cnt_init(buf, num_bytes, CCARD_HASH_MURMUR | CCARD_OPT_SPARSE); 523 | EXPECT_NE(other, (adp_cnt_ctx_t *)NULL); 524 | EXPECT_EQ(adp_cnt_card(other), esti); 525 | } 526 | 527 | // vi:ft=c ts=4 sw=4 fdm=marker et 528 | 529 | -------------------------------------------------------------------------------- /t/hyperloglog_counting_unittest.cc: -------------------------------------------------------------------------------- 1 | #include "ccard_common.h" 2 | #include "hyperloglog_counting.h" 3 | #include "gtest/gtest.h" 4 | 5 | /** 6 | * Tests estimate. 7 | * 8 | *

9 | * From 1 to 500000 distinct elements print estimated value every 50000. 10 | * Use HyperloglogCounting algorithm 11 | *

12 | * */ 13 | TEST(HyperloglogCounting, RawCounting) 14 | { 15 | int rc; 16 | int64_t i, esti; 17 | hll_cnt_ctx_t *ctx1 = hll_cnt_raw_init(NULL, 16, CCARD_HASH_MURMUR); 18 | EXPECT_NE(ctx1, (hll_cnt_ctx_t *)NULL); 19 | hll_cnt_ctx_t *ctx2 = hll_cnt_raw_init(NULL, 16, CCARD_HASH_LOOKUP3); 20 | EXPECT_NE(ctx2, (hll_cnt_ctx_t *)NULL); 21 | 22 | printf("Hyperloglog Counting with Murmurhash:\n"); 23 | for (i = 1; i <= 500000L; i++) { 24 | rc = hll_cnt_offer(ctx1, &i, sizeof(int64_t)); 25 | EXPECT_GE(rc, 0); 26 | 27 | if (i % 50000 == 0) { 28 | esti = hll_cnt_card(ctx1); 29 | EXPECT_GT(esti, 0); 30 | printf("actual: %9lu, estimated: %9lu, error: %+7.2f%%\n", 31 | (long unsigned int)i, (long unsigned int)esti, (double)(esti - i) / i * 100); 32 | } 33 | } 34 | printf("\n"); 35 | 36 | rc = hll_cnt_reset(ctx1); 37 | EXPECT_EQ(rc, 0); 38 | 39 | printf("Hyperloglog Counting with Lookup3hash:\n"); 40 | for (i = 1; i <= 500000L; i++) { 41 | rc = hll_cnt_offer(ctx2, &i, sizeof(int64_t)); 42 | EXPECT_GE(rc, 0); 43 | 44 | if (i % 50000 == 0) { 45 | esti = hll_cnt_card(ctx2); 46 | EXPECT_GT(esti, 0); 47 | printf("actual: %9lu, estimated: %9lu, error: %+7.2f%%\n", 48 | (long unsigned int)i, (long unsigned int)esti, (double)(esti - i) / i * 100); 49 | } 50 | } 51 | printf("\n"); 52 | 53 | rc = hll_cnt_reset(ctx2); 54 | EXPECT_EQ(rc, 0); 55 | 56 | rc = hll_cnt_fini(ctx2); 57 | EXPECT_EQ(rc, 0); 58 | rc = hll_cnt_fini(ctx1); 59 | EXPECT_EQ(rc, 0); 60 | } 61 | 62 | /** 63 | * Tests estimate. 64 | * 65 | *

66 | * From 1 to 500000 distinct elements print estimated value every 50000. 67 | * Use HyperloglogCounting algorithm 68 | *

69 | * */ 70 | TEST(HyperloglogCounting, Counting) 71 | { 72 | int rc; 73 | int64_t i, esti; 74 | hll_cnt_ctx_t *ctx1 = hll_cnt_init(NULL, 16, CCARD_HASH_MURMUR); 75 | EXPECT_NE(ctx1, (hll_cnt_ctx_t *)NULL); 76 | hll_cnt_ctx_t *ctx2 = hll_cnt_init(NULL, 16, CCARD_HASH_LOOKUP3); 77 | EXPECT_NE(ctx2, (hll_cnt_ctx_t *)NULL); 78 | 79 | printf("Hyperloglog Counting with Murmurhash:\n"); 80 | for (i = 1; i <= 500000L; i++) { 81 | rc = hll_cnt_offer(ctx1, &i, sizeof(int64_t)); 82 | EXPECT_GE(rc, 0); 83 | 84 | if (i % 50000 == 0) { 85 | esti = hll_cnt_card(ctx1); 86 | EXPECT_GT(esti, 0); 87 | printf("actual: %9lu, estimated: %9lu, error: %+7.2f%%\n", 88 | (long unsigned int)i, (long unsigned int)esti, (double)(esti - i) / i * 100); 89 | } 90 | } 91 | printf("\n"); 92 | 93 | rc = hll_cnt_reset(ctx1); 94 | EXPECT_EQ(rc, 0); 95 | 96 | printf("Hyperloglog Counting with Lookup3hash:\n"); 97 | for (i = 1; i <= 500000L; i++) { 98 | rc = hll_cnt_offer(ctx2, &i, sizeof(int64_t)); 99 | EXPECT_GE(rc, 0); 100 | 101 | if (i % 50000 == 0) { 102 | esti = hll_cnt_card(ctx2); 103 | EXPECT_GT(esti, 0); 104 | printf("actual: %9lu, estimated: %9lu, error: %+7.2f%%\n", 105 | (long unsigned int)i, (long unsigned int)esti, (double)(esti - i) / i * 100); 106 | } 107 | } 108 | printf("\n"); 109 | 110 | rc = hll_cnt_reset(ctx2); 111 | EXPECT_EQ(rc, 0); 112 | 113 | rc = hll_cnt_fini(ctx2); 114 | EXPECT_EQ(rc, 0); 115 | rc = hll_cnt_fini(ctx1); 116 | EXPECT_EQ(rc, 0); 117 | } 118 | 119 | /** 120 | * Tests serialize, unserialize and merge. 121 | * 122 | *
    123 | *
  1. Current context contains 1 to 20000
  2. 124 | *
  3. Tbm1 that contains 10000 to 30000 be serialized as buf1
  4. 125 | *
  5. Tbm2 that contains 20000 to 40000 be serialized as buf2
  6. 126 | *
  7. Merges buf1 and buf2 into current context
  8. 127 | *
128 | * */ 129 | TEST(HyperloglogCounting, RawMerge) 130 | { 131 | int rc; 132 | int64_t i, esti; 133 | hll_cnt_ctx_t *ctx = hll_cnt_raw_init(NULL, 16, CCARD_HASH_MURMUR); 134 | EXPECT_NE(ctx, (hll_cnt_ctx_t *)NULL); 135 | hll_cnt_ctx_t *tbm1 = hll_cnt_raw_init(NULL, 16, CCARD_HASH_MURMUR); 136 | EXPECT_NE(tbm1, (hll_cnt_ctx_t *)NULL); 137 | hll_cnt_ctx_t *tbm2 = hll_cnt_raw_init(NULL, 16, CCARD_HASH_MURMUR); 138 | EXPECT_NE(tbm2, (hll_cnt_ctx_t *)NULL); 139 | int32_t m = 1 << 16; 140 | uint8_t buf1[m + 3], buf2[m + 3]; 141 | uint32_t len1 = m + 3, len2 = m + 3; 142 | 143 | for (i = 1; i <= 20000L; i++) { 144 | rc = hll_cnt_offer(ctx, &i, sizeof(uint64_t)); 145 | EXPECT_GE(rc, 0); 146 | } 147 | for (i = 10000L; i <= 30000L; i++) { 148 | rc = hll_cnt_offer(tbm1, &i, sizeof(uint64_t)); 149 | EXPECT_GE(rc, 0); 150 | } 151 | rc = hll_cnt_get_raw_bytes(tbm1, buf1, &len1); 152 | EXPECT_EQ(rc, 0); 153 | for (i = 20000L; i <= 40000L; i++) { 154 | rc = hll_cnt_offer(tbm2, &i, sizeof(uint64_t)); 155 | EXPECT_GE(rc, 0); 156 | } 157 | rc = hll_cnt_get_raw_bytes(tbm2, buf2, &len2); 158 | EXPECT_EQ(rc, 0); 159 | 160 | rc = hll_cnt_merge_raw_bytes(ctx, buf1, len1, buf2, len2, NULL); 161 | EXPECT_EQ(rc, 0); 162 | esti = hll_cnt_card(ctx); 163 | EXPECT_GT(esti, 0); 164 | printf("actual:40000, estimated: %9lu, error: %+7.2f%%\n", 165 | (long unsigned int)esti, (double)(esti - 40000) / 40000 * 100); 166 | 167 | rc = hll_cnt_fini(tbm2); 168 | EXPECT_EQ(rc, 0); 169 | rc = hll_cnt_fini(tbm1); 170 | EXPECT_EQ(rc, 0); 171 | rc = hll_cnt_fini(ctx); 172 | EXPECT_EQ(rc, 0); 173 | } 174 | 175 | /** 176 | * Tests serialize, unserialize and merge. 177 | * 178 | *
    179 | *
  1. Current context contains 1 to 20000
  2. 180 | *
  3. Tbm1 that contains 10000 to 30000 be serialized as buf1
  4. 181 | *
  5. Tbm2 that contains 20000 to 40000 be serialized as buf2
  6. 182 | *
  7. Merges buf1 and buf2 into current context
  8. 183 | *
184 | * */ 185 | TEST(HyperloglogCounting, Merge) 186 | { 187 | int rc; 188 | int64_t i, esti; 189 | hll_cnt_ctx_t *ctx = hll_cnt_init(NULL, 16, CCARD_HASH_MURMUR); 190 | EXPECT_NE(ctx, (hll_cnt_ctx_t *)NULL); 191 | hll_cnt_ctx_t *tbm1 = hll_cnt_init(NULL, 16, CCARD_HASH_MURMUR); 192 | EXPECT_NE(tbm1, (hll_cnt_ctx_t *)NULL); 193 | hll_cnt_ctx_t *tbm2 = hll_cnt_init(NULL, 16, CCARD_HASH_MURMUR); 194 | EXPECT_NE(tbm2, (hll_cnt_ctx_t *)NULL); 195 | int32_t m = 1 << 16; 196 | uint8_t buf1[m + 3], buf2[m + 3]; 197 | uint32_t len1 = m + 3, len2 = m + 3; 198 | 199 | for (i = 1; i <= 20000L; i++) { 200 | rc = hll_cnt_offer(ctx, &i, sizeof(uint64_t)); 201 | EXPECT_GE(rc, 0); 202 | } 203 | for (i = 10000L; i <= 30000L; i++) { 204 | rc = hll_cnt_offer(tbm1, &i, sizeof(uint64_t)); 205 | EXPECT_GE(rc, 0); 206 | } 207 | rc = hll_cnt_get_bytes(tbm1, buf1, &len1); 208 | EXPECT_EQ(rc, 0); 209 | for (i = 20000L; i <= 40000L; i++) { 210 | rc = hll_cnt_offer(tbm2, &i, sizeof(uint64_t)); 211 | EXPECT_GE(rc, 0); 212 | } 213 | rc = hll_cnt_get_bytes(tbm2, buf2, &len2); 214 | EXPECT_EQ(rc, 0); 215 | 216 | rc = hll_cnt_merge_bytes(ctx, buf1, len1, buf2, len2, NULL); 217 | EXPECT_EQ(rc, 0); 218 | esti = hll_cnt_card(ctx); 219 | EXPECT_GT(esti, 0); 220 | printf("actual:40000, estimated: %9lu, error: %+7.2f%%\n", 221 | (long unsigned int)esti, (double)(esti - 40000) / 40000 * 100); 222 | 223 | rc = hll_cnt_fini(tbm2); 224 | EXPECT_EQ(rc, 0); 225 | rc = hll_cnt_fini(tbm1); 226 | EXPECT_EQ(rc, 0); 227 | rc = hll_cnt_fini(ctx); 228 | EXPECT_EQ(rc, 0); 229 | } 230 | 231 | TEST(HyperloglogCounting, Deserialize) 232 | { 233 | hll_cnt_ctx_t *ctx = hll_cnt_init(NULL, 16, CCARD_HASH_MURMUR); 234 | EXPECT_NE(ctx, (hll_cnt_ctx_t *)NULL); 235 | 236 | for(int i = 1; i < 100; i++) { 237 | hll_cnt_offer(ctx, &i, sizeof(i)); 238 | } 239 | uint64_t esti = hll_cnt_card(ctx); 240 | 241 | uint32_t num_bytes = 0; 242 | EXPECT_EQ(hll_cnt_get_bytes(ctx, NULL, &num_bytes), 0); 243 | uint8_t buf[num_bytes]; 244 | EXPECT_EQ(hll_cnt_get_bytes(ctx, buf, &num_bytes), 0); 245 | hll_cnt_ctx_t *other = hll_cnt_init(buf, num_bytes, CCARD_HASH_MURMUR); 246 | EXPECT_NE(other, (hll_cnt_ctx_t *)NULL); 247 | EXPECT_EQ(hll_cnt_card(other), esti); 248 | } 249 | 250 | // vi:ft=c ts=4 sw=4 fdm=marker et 251 | -------------------------------------------------------------------------------- /t/hyperloglogplus_counting_unittest.cc: -------------------------------------------------------------------------------- 1 | #include "ccard_common.h" 2 | #include "hyperloglogplus_counting.h" 3 | #include "gtest/gtest.h" 4 | 5 | /** 6 | * Tests estimate. 7 | * 8 | *

9 | * From 1 to 500000 distinct elements print estimated value every 50000. 10 | * Use HyperloglogPlusCounting algorithm 11 | *

12 | * */ 13 | TEST(HyperloglogPlusCounting, RawCounting) 14 | { 15 | int rc; 16 | int64_t i, esti; 17 | hllp_cnt_ctx_t *ctx = hllp_cnt_raw_init(NULL, 16); 18 | EXPECT_NE(ctx, (hllp_cnt_ctx_t *)NULL); 19 | 20 | printf("HyperloglogPlus Counting:\n"); 21 | for (i = 1; i <= 500000L; i++) { 22 | rc = hllp_cnt_offer(ctx, &i, sizeof(int64_t)); 23 | EXPECT_GE(rc, 0); 24 | 25 | if (i % 50000 == 0) { 26 | esti = hllp_cnt_card(ctx); 27 | EXPECT_GT(esti, 0); 28 | printf("actual: %9lu, estimated: %9lu, error: %+7.2f%%\n", 29 | (long unsigned int)i, (long unsigned int)esti, (double)(esti - i) / i * 100); 30 | } 31 | } 32 | printf("\n"); 33 | 34 | rc = hllp_cnt_reset(ctx); 35 | EXPECT_EQ(rc, 0); 36 | 37 | rc = hllp_cnt_fini(ctx); 38 | EXPECT_EQ(rc, 0); 39 | } 40 | 41 | /** 42 | * Tests estimate. 43 | * 44 | *

45 | * From 1 to 500000 distinct elements print estimated value every 50000. 46 | * Use HyperloglogPlusCounting algorithm 47 | *

48 | * */ 49 | TEST(HyperloglogPlusCounting, Counting) 50 | { 51 | int rc; 52 | int64_t i, esti; 53 | hllp_cnt_ctx_t *ctx = hllp_cnt_init(NULL, 16); 54 | EXPECT_NE(ctx, (hllp_cnt_ctx_t *)NULL); 55 | 56 | printf("HyperloglogPlus Counting:\n"); 57 | for (i = 1; i <= 500000L; i++) { 58 | rc = hllp_cnt_offer(ctx, &i, sizeof(int64_t)); 59 | EXPECT_GE(rc, 0); 60 | 61 | if (i % 50000 == 0) { 62 | esti = hllp_cnt_card(ctx); 63 | EXPECT_GT(esti, 0); 64 | printf("actual: %9lu, estimated: %9lu, error: %+7.2f%%\n", 65 | (long unsigned int)i, (long unsigned int)esti, (double)(esti - i) / i * 100); 66 | } 67 | } 68 | printf("\n"); 69 | 70 | rc = hllp_cnt_reset(ctx); 71 | EXPECT_EQ(rc, 0); 72 | 73 | rc = hllp_cnt_fini(ctx); 74 | EXPECT_EQ(rc, 0); 75 | } 76 | 77 | TEST(HyperloglogPlusCounting, Deserialize) 78 | { 79 | hllp_cnt_ctx_t *ctx = hllp_cnt_init(NULL, 16); 80 | EXPECT_NE(ctx, (hllp_cnt_ctx_t *)NULL); 81 | 82 | for(int i = 1; i < 100; i++) { 83 | hllp_cnt_offer(ctx, &i, sizeof(i)); 84 | } 85 | uint64_t esti = hllp_cnt_card(ctx); 86 | 87 | uint32_t num_bytes = 0; 88 | EXPECT_EQ(hllp_cnt_get_bytes(ctx, NULL, &num_bytes), 0); 89 | uint8_t buf[num_bytes]; 90 | EXPECT_EQ(hllp_cnt_get_bytes(ctx, buf, &num_bytes), 0); 91 | hllp_cnt_ctx_t *other = hllp_cnt_init(buf, num_bytes); 92 | EXPECT_NE(other, (hllp_cnt_ctx_t *)NULL); 93 | EXPECT_EQ(hllp_cnt_card(other), esti); 94 | } 95 | 96 | TEST(HyperloglogPlusCounting, MergeBytes) 97 | { 98 | hllp_cnt_ctx_t *ctx = hllp_cnt_init(NULL, 10); 99 | hllp_cnt_ctx_t *other = hllp_cnt_init(NULL, 10); 100 | int64_t i = 1; 101 | hllp_cnt_offer(other, &i, sizeof(int64_t)); 102 | uint8_t buf[1027]; 103 | uint32_t len = 1027; 104 | int result = hllp_cnt_get_bytes(other, buf, &len); 105 | EXPECT_EQ(result, 0); 106 | result = hllp_cnt_merge_bytes(ctx, buf, 1027, NULL); 107 | EXPECT_EQ(result, 0); 108 | EXPECT_EQ(hllp_cnt_card(ctx), 1); 109 | } 110 | 111 | TEST(HyperloglogPlusCounting, MergeBytesVariadic) 112 | { 113 | hllp_cnt_ctx_t *ctx = hllp_cnt_init(NULL, 10); 114 | hllp_cnt_ctx_t *other1 = hllp_cnt_init(NULL, 10); 115 | hllp_cnt_ctx_t *other2 = hllp_cnt_init(NULL, 10); 116 | int64_t i = 1; 117 | hllp_cnt_offer(other1, &i, sizeof(int64_t)); 118 | i = 2; 119 | hllp_cnt_offer(other2, &i, sizeof(int64_t)); 120 | 121 | uint8_t buf1[1027]; 122 | uint8_t buf2[1027]; 123 | 124 | uint32_t len = 1027; 125 | int result = hllp_cnt_get_bytes(other1, buf1, &len); 126 | EXPECT_EQ(result, 0); 127 | result = hllp_cnt_get_bytes(other2, buf2, &len); 128 | EXPECT_EQ(result, 0); 129 | result = hllp_cnt_merge_bytes(ctx, buf1, 1027, buf2, 1027, NULL); 130 | EXPECT_EQ(result, 0); 131 | EXPECT_EQ(hllp_cnt_card(ctx), 2); 132 | } 133 | 134 | -------------------------------------------------------------------------------- /t/linear_counting_unittest.cc: -------------------------------------------------------------------------------- 1 | #include "ccard_common.h" 2 | #include "linear_counting.h" 3 | #include "gtest/gtest.h" 4 | 5 | /** 6 | * Tests estimate. 7 | * 8 | *

9 | * From 1 to 500000 distinct elements print estimated value every 50000. 10 | * Use LinearCounting algorithm 11 | *

12 | * */ 13 | TEST(LinearCounting, RawCounting) 14 | { 15 | int rc; 16 | int64_t i, esti; 17 | lnr_cnt_ctx_t *ctx1 = lnr_cnt_raw_init(NULL, 16, CCARD_HASH_MURMUR); 18 | EXPECT_NE(ctx1, (lnr_cnt_ctx_t *)NULL); 19 | lnr_cnt_ctx_t *ctx2 = lnr_cnt_raw_init(NULL, 16, CCARD_HASH_LOOKUP3); 20 | EXPECT_NE(ctx2, (lnr_cnt_ctx_t *)NULL); 21 | 22 | printf("Linear Counting with Murmurhash:\n"); 23 | for (i = 1; i <= 500000L; i++) { 24 | rc = lnr_cnt_offer(ctx1, &i, sizeof(int64_t)); 25 | EXPECT_GE(rc, 0); 26 | 27 | if (i % 50000 == 0) { 28 | esti = lnr_cnt_card(ctx1); 29 | EXPECT_GT(esti, 0); 30 | printf("actual: %9lu, estimated: %9lu, error: %+7.2f%%\n", 31 | (long unsigned int)i, (long unsigned int)esti, (double)(esti - i) / i * 100); 32 | } 33 | } 34 | printf("\n"); 35 | 36 | rc = lnr_cnt_reset(ctx1); 37 | EXPECT_EQ(rc, 0); 38 | 39 | printf("Linear Counting with Lookup3hash:\n"); 40 | for (i = 1; i <= 500000L; i++) { 41 | rc = lnr_cnt_offer(ctx2, &i, sizeof(int64_t)); 42 | EXPECT_GE(rc, 0); 43 | 44 | if (i % 50000 == 0) { 45 | esti = lnr_cnt_card(ctx2); 46 | EXPECT_GT(esti, 0); 47 | printf("actual: %9lu, estimated: %9lu, error: %+7.2f%%\n", 48 | (long unsigned int)i, (long unsigned int)esti, (double)(esti - i) / i * 100); 49 | } 50 | } 51 | printf("\n"); 52 | 53 | rc = lnr_cnt_reset(ctx2); 54 | EXPECT_EQ(rc, 0); 55 | 56 | rc = lnr_cnt_fini(ctx2); 57 | EXPECT_EQ(rc, 0); 58 | rc = lnr_cnt_fini(ctx1); 59 | EXPECT_EQ(rc, 0); 60 | } 61 | 62 | /** 63 | * Tests estimate. 64 | * 65 | *

66 | * From 1 to 500000 distinct elements print estimated value every 50000. 67 | * Use LinearCounting algorithm 68 | *

69 | * */ 70 | TEST(LinearCounting, Counting) 71 | { 72 | int rc; 73 | int64_t i, esti; 74 | lnr_cnt_ctx_t *ctx1 = lnr_cnt_init(NULL, 16, CCARD_HASH_MURMUR); 75 | EXPECT_NE(ctx1, (lnr_cnt_ctx_t *)NULL); 76 | lnr_cnt_ctx_t *ctx2 = lnr_cnt_init(NULL, 16, CCARD_HASH_LOOKUP3); 77 | EXPECT_NE(ctx2, (lnr_cnt_ctx_t *)NULL); 78 | 79 | printf("Linear Counting with Murmurhash:\n"); 80 | for (i = 1; i <= 500000L; i++) { 81 | rc = lnr_cnt_offer(ctx1, &i, sizeof(int64_t)); 82 | EXPECT_GE(rc, 0); 83 | 84 | if (i % 50000 == 0) { 85 | esti = lnr_cnt_card(ctx1); 86 | EXPECT_GT(esti, 0); 87 | printf("actual: %9lu, estimated: %9lu, error: %+7.2f%%\n", 88 | (long unsigned int)i, (long unsigned int)esti, (double)(esti - i) / i * 100); 89 | } 90 | } 91 | printf("\n"); 92 | 93 | rc = lnr_cnt_reset(ctx1); 94 | EXPECT_EQ(rc, 0); 95 | 96 | printf("Linear Counting with Lookup3hash:\n"); 97 | for (i = 1; i <= 500000L; i++) { 98 | rc = lnr_cnt_offer(ctx2, &i, sizeof(int64_t)); 99 | EXPECT_GE(rc, 0); 100 | 101 | if (i % 50000 == 0) { 102 | esti = lnr_cnt_card(ctx2); 103 | EXPECT_GT(esti, 0); 104 | printf("actual: %9lu, estimated: %9lu, error: %+7.2f%%\n", 105 | (long unsigned int)i, (long unsigned int)esti, (double)(esti - i) / i * 100); 106 | } 107 | } 108 | printf("\n"); 109 | 110 | rc = lnr_cnt_reset(ctx2); 111 | EXPECT_EQ(rc, 0); 112 | 113 | rc = lnr_cnt_fini(ctx2); 114 | EXPECT_EQ(rc, 0); 115 | rc = lnr_cnt_fini(ctx1); 116 | EXPECT_EQ(rc, 0); 117 | } 118 | 119 | /** 120 | * Tests serialize, unserialize and merge. 121 | * 122 | *
    123 | *
  1. Current context contains 1 to 20000
  2. 124 | *
  3. Tbm1 that contains 10000 to 30000 be serialized as buf1
  4. 125 | *
  5. Tbm2 that contains 20000 to 40000 be serialized as buf2
  6. 126 | *
  7. Merges buf1 and buf2 into current context
  8. 127 | *
128 | * */ 129 | TEST(LinearCounting, RawMerge) 130 | { 131 | int rc; 132 | int64_t i, esti; 133 | lnr_cnt_ctx_t *ctx = lnr_cnt_raw_init(NULL, 16, CCARD_HASH_MURMUR); 134 | EXPECT_NE(ctx, (lnr_cnt_ctx_t *)NULL); 135 | lnr_cnt_ctx_t *tbm1 = lnr_cnt_raw_init(NULL, 16, CCARD_HASH_MURMUR); 136 | EXPECT_NE(tbm1, (lnr_cnt_ctx_t *)NULL); 137 | lnr_cnt_ctx_t *tbm2 = lnr_cnt_raw_init(NULL, 16, CCARD_HASH_MURMUR); 138 | EXPECT_NE(tbm2, (lnr_cnt_ctx_t *)NULL); 139 | int32_t m = 1 << 16; 140 | uint8_t buf1[m + 3], buf2[m + 3]; 141 | uint32_t len1 = m + 3, len2 = m + 3; 142 | 143 | for (i = 1; i <= 20000L; i++) { 144 | rc = lnr_cnt_offer(ctx, &i, sizeof(uint64_t)); 145 | EXPECT_GE(rc, 0); 146 | } 147 | for (i = 10000L; i <= 30000L; i++) { 148 | rc = lnr_cnt_offer(tbm1, &i, sizeof(uint64_t)); 149 | EXPECT_GE(rc, 0); 150 | } 151 | rc = lnr_cnt_get_raw_bytes(tbm1, buf1, &len1); 152 | for (i = 20000L; i <= 40000L; i++) { 153 | rc = lnr_cnt_offer(tbm2, &i, sizeof(uint64_t)); 154 | EXPECT_GE(rc, 0); 155 | } 156 | rc = lnr_cnt_get_raw_bytes(tbm2, buf2, &len2); 157 | EXPECT_EQ(rc, 0); 158 | 159 | rc = lnr_cnt_merge_raw_bytes(ctx, buf1, len1, buf2, len2, NULL); 160 | EXPECT_EQ(rc, 0); 161 | esti = lnr_cnt_card(ctx); 162 | EXPECT_GT(esti, 0); 163 | printf("actual:40000, estimated: %9lu, error: %+7.2f%%\n", 164 | (long unsigned int)esti, (double)(esti - 40000) / 40000 * 100); 165 | 166 | rc = lnr_cnt_fini(tbm2); 167 | EXPECT_EQ(rc, 0); 168 | rc = lnr_cnt_fini(tbm1); 169 | EXPECT_EQ(rc, 0); 170 | rc = lnr_cnt_fini(ctx); 171 | EXPECT_EQ(rc, 0); 172 | } 173 | 174 | /** 175 | * Tests serialize, unserialize and merge. 176 | * 177 | *
    178 | *
  1. Current context contains 1 to 20000
  2. 179 | *
  3. Tbm1 that contains 10000 to 30000 be serialized as buf1
  4. 180 | *
  5. Tbm2 that contains 20000 to 40000 be serialized as buf2
  6. 181 | *
  7. Merges buf1 and buf2 into current context
  8. 182 | *
183 | * */ 184 | TEST(LinearCounting, Merge) 185 | { 186 | int rc; 187 | int64_t i, esti; 188 | lnr_cnt_ctx_t *ctx = lnr_cnt_init(NULL, 16, CCARD_HASH_MURMUR); 189 | EXPECT_NE(ctx, (lnr_cnt_ctx_t *)NULL); 190 | lnr_cnt_ctx_t *tbm1 = lnr_cnt_init(NULL, 16, CCARD_HASH_MURMUR); 191 | EXPECT_NE(tbm1, (lnr_cnt_ctx_t *)NULL); 192 | lnr_cnt_ctx_t *tbm2 = lnr_cnt_init(NULL, 16, CCARD_HASH_MURMUR); 193 | EXPECT_NE(tbm2, (lnr_cnt_ctx_t *)NULL); 194 | int32_t m = 1 << 16; 195 | uint8_t buf1[m + 3], buf2[m + 3]; 196 | uint32_t len1 = m + 3, len2 = m + 3; 197 | 198 | for (i = 1; i <= 20000L; i++) { 199 | rc = lnr_cnt_offer(ctx, &i, sizeof(uint64_t)); 200 | EXPECT_GE(rc, 0); 201 | } 202 | for (i = 10000L; i <= 30000L; i++) { 203 | rc = lnr_cnt_offer(tbm1, &i, sizeof(uint64_t)); 204 | EXPECT_GE(rc, 0); 205 | } 206 | rc = lnr_cnt_get_bytes(tbm1, buf1, &len1); 207 | EXPECT_EQ(rc, 0); 208 | for (i = 20000L; i <= 40000L; i++) { 209 | rc = lnr_cnt_offer(tbm2, &i, sizeof(uint64_t)); 210 | EXPECT_GE(rc, 0); 211 | } 212 | rc = lnr_cnt_get_bytes(tbm2, buf2, &len2); 213 | EXPECT_EQ(rc, 0); 214 | 215 | rc = lnr_cnt_merge_bytes(ctx, buf1, len1, buf2, len2, NULL); 216 | EXPECT_EQ(rc, 0); 217 | esti = lnr_cnt_card(ctx); 218 | EXPECT_GT(esti, 0); 219 | printf("actual:40000, estimated: %9lu, error: %+7.2f%%\n", 220 | (long unsigned int)esti, (double)(esti - 40000) / 40000 * 100); 221 | 222 | rc = lnr_cnt_fini(tbm2); 223 | EXPECT_EQ(rc, 0); 224 | rc = lnr_cnt_fini(tbm1); 225 | EXPECT_EQ(rc, 0); 226 | rc = lnr_cnt_fini(ctx); 227 | EXPECT_EQ(rc, 0); 228 | } 229 | 230 | /** 231 | * Serialize & deserialize bitmap 232 | * */ 233 | TEST(LinearCounting, Deserialize) 234 | { 235 | lnr_cnt_ctx_t *ctx = lnr_cnt_init(NULL, 16, CCARD_HASH_MURMUR); 236 | EXPECT_NE(ctx, (lnr_cnt_ctx_t *)NULL); 237 | 238 | for(int i = 1; i < 100; i++) { 239 | lnr_cnt_offer(ctx, &i, sizeof(i)); 240 | } 241 | int64_t esti = lnr_cnt_card(ctx); 242 | 243 | uint32_t num_bytes = 0; 244 | EXPECT_EQ(lnr_cnt_get_bytes(ctx, NULL, &num_bytes), 0); 245 | 246 | uint8_t buf[num_bytes]; 247 | EXPECT_EQ(lnr_cnt_get_bytes(ctx, buf, &num_bytes), 0); 248 | 249 | lnr_cnt_ctx_t *other = lnr_cnt_init(buf, num_bytes, CCARD_HASH_MURMUR); 250 | EXPECT_NE(other, (lnr_cnt_ctx_t *)NULL); 251 | EXPECT_EQ(lnr_cnt_card(other), esti); 252 | } 253 | 254 | // vi:ft=c ts=4 sw=4 fdm=marker et 255 | 256 | -------------------------------------------------------------------------------- /t/lookup3hash_unittest.cc: -------------------------------------------------------------------------------- 1 | #include "lookup3hash.h" 2 | #include "gtest/gtest.h" 3 | 4 | /** 5 | * Tests Lookup3hash with byte[] buffer, the hash code is 64bit. 6 | * 7 | *

8 | * When using java stream-lib the hash code of "hello world" is 4141157809988715033 9 | *

10 | * */ 11 | TEST(Lookup3hashTest, StringHashToLong) 12 | { 13 | const char *s = "hello world"; 14 | 15 | EXPECT_EQ(4141157809988715033lu, lookup3ycs64_2(s)); 16 | } 17 | 18 | // vi:ft=c ts=4 sw=4 fdm=marker et 19 | 20 | -------------------------------------------------------------------------------- /t/murmurhash_unittest.cc: -------------------------------------------------------------------------------- 1 | #include "murmurhash.h" 2 | #include "gtest/gtest.h" 3 | 4 | /** 5 | * Tests Murmurhash with byte[] buffer. 6 | * 7 | *

8 | * When using java stream-lib the hash code of "hello world" is 1964480955 9 | *

10 | * */ 11 | TEST(MurmurhashTest, Buffer) 12 | { 13 | char *s = (char *)"hello world"; 14 | 15 | EXPECT_EQ(1964480955lu, murmurhash((void *)s, strlen(s), -1)); 16 | } 17 | 18 | /** 19 | * Tests Murmurhash with 64bit long integer. 20 | * 21 | *

22 | * When using java stream-lib the hash code of 123456 is 148129653 23 | *

24 | * */ 25 | TEST(MurmurhashTest, Long) 26 | { 27 | uint64_t longint = 123456; 28 | 29 | EXPECT_EQ(148129653lu, murmurhash_long(longint)); 30 | } 31 | 32 | /** 33 | * Tests Murmurhash64 with byte[] buffer. 34 | * 35 | *

36 | * When using java stream-lib the hash code (with sign) of "hello world" is -779442749388864765 37 | *

38 | * */ 39 | TEST(Murmurhash64Test, Buffer) 40 | { 41 | char *s = (char *)"hello world"; 42 | 43 | EXPECT_EQ(-779442749388864765l, (int64_t)murmurhash64_no_seed((void *)s, strlen(s))); 44 | } 45 | 46 | // vi:ft=c ts=4 sw=4 fdm=marker et 47 | -------------------------------------------------------------------------------- /t/register_set_unittest.cc: -------------------------------------------------------------------------------- 1 | #include "register_set.h" 2 | #include "gtest/gtest.h" 3 | 4 | /** 5 | * Tests set and get value of register set. 6 | * 7 | *

8 | *

9 | * */ 10 | TEST(RegisterSetTest, GetAndSet) 11 | { 12 | reg_set_t *rs = rs_init(1024, NULL, 0); 13 | uint32_t value; 14 | int i; 15 | 16 | for (i = 0; i < 1023; i++) { 17 | rs_set(rs, i, i % 32); 18 | } 19 | 20 | for (i = 0; i < 1023; i++) { 21 | if (i % 100 == 0) { 22 | rs_get(rs, i, &value); 23 | EXPECT_EQ((uint32_t)(i % 32), value); 24 | } 25 | } 26 | 27 | rs_fini(rs); 28 | } 29 | 30 | // vi:ft=c ts=4 sw=4 fdm=marker et 31 | 32 | -------------------------------------------------------------------------------- /util/indent-src: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | INDENT="astyle -A3 -s4 -S -c -k3 -O -o -Y -m0 -n -p" 3 | find . -name '*.[c,h]' -exec $INDENT '{}' ';' 4 | find . -name '*.cc' -exec $INDENT '{}' ';' 5 | 6 | --------------------------------------------------------------------------------