├── README.md ├── bitmap.c ├── ewah_bitmap.c ├── ewah_io.c ├── ewah_rlw.c ├── ewok.h ├── ewok_rlw.h └── test └── test_logical.c /README.md: -------------------------------------------------------------------------------- 1 | libewok 2 | ======= 3 | 4 | EWAH Compressed bitmaps in C. 5 | 6 | Usage 7 | ----- 8 | 9 | ```c 10 | struct ewah_bitmap *array = ewah_bitmap_new(); 11 | struct ewah_iterator it; 12 | eword_t word; 13 | 14 | ewah_bitmap_set(array, 3); 15 | ewah_bitmap_set(array, 32); 16 | ewah_bitmap_set(array, 48); 17 | ewah_bitmap_set(array, 63); 18 | ewah_bitmap_set(array, 1024); 19 | ewah_bitmap_set(array, 7600); 20 | 21 | ewah_iterator_init(&it, array); 22 | 23 | while (ewah_iterator_next(&word, &it)) 24 | printf("%08llX ", word); 25 | 26 | ewah_bitmap_each_bit(array, &print_a_bit, NULL); 27 | 28 | ewah_bitmap_free(array); 29 | ```` 30 | 31 | Related docs: 32 | ------------ 33 | 34 | - https://github.com/lemire/EWAHBoolArray 35 | - https://github.com/lemire/javaewah 36 | - http://arxiv.org/abs/0901.3751 37 | - http://www.slideshare.net/lemire/all-about-bitmap-indexes-and-sorting-them 38 | 39 | License 40 | ------- 41 | 42 | This port is based off the original EWAHBoolArray implementation by Daniel Lemire and it's licensed under the terms of the GPLv2, with the original author's permission. 43 | 44 | Original contributors as follows: 45 | 46 | - Daniel Lemire (http://lemire.me/en/), 47 | - Cliff Moon (https://github.com/cliffmoon), 48 | - David McIntosh (https://github.com/mctofu), 49 | - Robert Becho (https://github.com/RBecho), 50 | - Colby Ranger (https://github.com/crangeratgoogle) 51 | - Veronika Zenz (https://github.com/veronikazenz) 52 | -------------------------------------------------------------------------------- /bitmap.c: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright 2013, GitHub, Inc 3 | * Copyright 2009-2013, Daniel Lemire, Cliff Moon, 4 | * David McIntosh, Robert Becho, Google Inc. and Veronika Zenz 5 | * 6 | * This program is free software; you can redistribute it and/or 7 | * modify it under the terms of the GNU General Public License 8 | * as published by the Free Software Foundation; either version 2 9 | * of the License, or (at your option) any later version. 10 | * 11 | * This program is distributed in the hope that it will be useful, 12 | * but WITHOUT ANY WARRANTY; without even the implied warranty of 13 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 14 | * GNU General Public License for more details. 15 | * 16 | * You should have received a copy of the GNU General Public License 17 | * along with this program; if not, write to the Free Software 18 | * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. 19 | */ 20 | #include 21 | #include 22 | #include 23 | 24 | #include "ewok.h" 25 | 26 | #define MASK(x) ((eword_t)1 << (x % BITS_IN_WORD)) 27 | #define BLOCK(x) (x / BITS_IN_WORD) 28 | 29 | struct bitmap *bitmap_new(void) 30 | { 31 | struct bitmap *bitmap = ewah_malloc(sizeof(struct bitmap)); 32 | bitmap->words = ewah_calloc(32, sizeof(eword_t)); 33 | bitmap->word_alloc = 32; 34 | return bitmap; 35 | } 36 | 37 | void bitmap_set(struct bitmap *self, size_t pos) 38 | { 39 | size_t block = BLOCK(pos); 40 | 41 | if (block >= self->word_alloc) { 42 | size_t old_size = self->word_alloc; 43 | self->word_alloc = block * 2; 44 | self->words = ewah_realloc(self->words, self->word_alloc * sizeof(eword_t)); 45 | 46 | memset(self->words + old_size, 0x0, 47 | (self->word_alloc - old_size) * sizeof(eword_t)); 48 | } 49 | 50 | self->words[block] |= MASK(pos); 51 | } 52 | 53 | void bitmap_clear(struct bitmap *self, size_t pos) 54 | { 55 | size_t block = BLOCK(pos); 56 | 57 | if (block < self->word_alloc) 58 | self->words[block] &= ~MASK(pos); 59 | } 60 | 61 | bool bitmap_get(struct bitmap *self, size_t pos) 62 | { 63 | size_t block = BLOCK(pos); 64 | return block < self->word_alloc && (self->words[block] & MASK(pos)) != 0; 65 | } 66 | 67 | struct ewah_bitmap *bitmap_compress(struct bitmap *bitmap) 68 | { 69 | struct ewah_bitmap *ewah = ewah_new(); 70 | size_t i, running_empty_words = 0; 71 | eword_t last_word = 0; 72 | 73 | for (i = 0; i < bitmap->word_alloc; ++i) { 74 | if (bitmap->words[i] == 0) { 75 | running_empty_words++; 76 | continue; 77 | } 78 | 79 | if (last_word != 0) { 80 | ewah_add(ewah, last_word); 81 | } 82 | 83 | if (running_empty_words > 0) { 84 | ewah_add_empty_words(ewah, false, running_empty_words); 85 | running_empty_words = 0; 86 | } 87 | 88 | last_word = bitmap->words[i]; 89 | } 90 | 91 | ewah_add(ewah, last_word); 92 | return ewah; 93 | } 94 | 95 | struct bitmap *ewah_to_bitmap(struct ewah_bitmap *ewah) 96 | { 97 | struct bitmap *bitmap = bitmap_new(); 98 | struct ewah_iterator it; 99 | eword_t blowup; 100 | size_t i = 0; 101 | 102 | ewah_iterator_init(&it, ewah); 103 | 104 | while (ewah_iterator_next(&blowup, &it)) { 105 | if (i >= bitmap->word_alloc) { 106 | bitmap->word_alloc *= 1.5; 107 | bitmap->words = ewah_realloc( 108 | bitmap->words, bitmap->word_alloc * sizeof(eword_t)); 109 | } 110 | 111 | bitmap->words[i++] = blowup; 112 | } 113 | 114 | bitmap->word_alloc = i; 115 | return bitmap; 116 | } 117 | 118 | void bitmap_free(struct bitmap *bitmap) 119 | { 120 | free(bitmap->words); 121 | free(bitmap); 122 | } 123 | -------------------------------------------------------------------------------- /ewah_bitmap.c: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright 2013, GitHub, Inc 3 | * Copyright 2009-2013, Daniel Lemire, Cliff Moon, 4 | * David McIntosh, Robert Becho, Google Inc. and Veronika Zenz 5 | * 6 | * This program is free software; you can redistribute it and/or 7 | * modify it under the terms of the GNU General Public License 8 | * as published by the Free Software Foundation; either version 2 9 | * of the License, or (at your option) any later version. 10 | * 11 | * This program is distributed in the hope that it will be useful, 12 | * but WITHOUT ANY WARRANTY; without even the implied warranty of 13 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 14 | * GNU General Public License for more details. 15 | * 16 | * You should have received a copy of the GNU General Public License 17 | * along with this program; if not, write to the Free Software 18 | * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. 19 | */ 20 | #include 21 | #include 22 | #include 23 | #include 24 | #include 25 | 26 | #include "ewok.h" 27 | #include "ewok_rlw.h" 28 | 29 | static inline size_t min_size(size_t a, size_t b) 30 | { 31 | return a < b ? a : b; 32 | } 33 | 34 | static inline size_t max_size(size_t a, size_t b) 35 | { 36 | return a > b ? a : b; 37 | } 38 | 39 | static inline void buffer_grow(struct ewah_bitmap *self, size_t new_size) 40 | { 41 | size_t rlw_offset = (uint8_t *)self->rlw - (uint8_t *)self->buffer; 42 | 43 | if (self->alloc_size >= new_size) 44 | return; 45 | 46 | self->alloc_size = new_size; 47 | self->buffer = ewah_realloc(self->buffer, self->alloc_size * sizeof(eword_t)); 48 | self->rlw = self->buffer + (rlw_offset / sizeof(size_t)); 49 | } 50 | 51 | static inline void buffer_push(struct ewah_bitmap *self, eword_t value) 52 | { 53 | if (self->buffer_size + 1 >= self->alloc_size) { 54 | buffer_grow(self, self->buffer_size * 1.5); 55 | } 56 | 57 | self->buffer[self->buffer_size++] = value; 58 | } 59 | 60 | static void buffer_push_rlw(struct ewah_bitmap *self, eword_t value) 61 | { 62 | buffer_push(self, value); 63 | self->rlw = self->buffer + self->buffer_size - 1; 64 | } 65 | 66 | static size_t add_empty_words(struct ewah_bitmap *self, bool v, size_t number) 67 | { 68 | size_t added = 0; 69 | 70 | if (rlw_get_run_bit(self->rlw) != v && rlw_size(self->rlw) == 0) { 71 | rlw_set_run_bit(self->rlw, v); 72 | } 73 | else if (rlw_get_literal_words(self->rlw) != 0 || rlw_get_run_bit(self->rlw) != v) { 74 | buffer_push_rlw(self, 0); 75 | if (v) rlw_set_run_bit(self->rlw, v); 76 | added++; 77 | } 78 | 79 | eword_t runlen = rlw_get_running_len(self->rlw); 80 | eword_t can_add = min_size(number, RLW_LARGEST_RUNNING_COUNT - runlen); 81 | 82 | rlw_set_running_len(self->rlw, runlen + can_add); 83 | number -= can_add; 84 | 85 | while (number >= RLW_LARGEST_RUNNING_COUNT) { 86 | buffer_push_rlw(self, 0); 87 | added++; 88 | 89 | if (v) rlw_set_run_bit(self->rlw, v); 90 | rlw_set_running_len(self->rlw, RLW_LARGEST_RUNNING_COUNT); 91 | 92 | number -= RLW_LARGEST_RUNNING_COUNT; 93 | } 94 | 95 | if (number > 0) { 96 | buffer_push_rlw(self, 0); 97 | added++; 98 | 99 | if (v) rlw_set_run_bit(self->rlw, v); 100 | rlw_set_running_len(self->rlw, number); 101 | } 102 | 103 | return added; 104 | } 105 | 106 | size_t ewah_add_empty_words(struct ewah_bitmap *self, bool v, size_t number) 107 | { 108 | if (number == 0) 109 | return 0; 110 | 111 | self->bit_size += number * BITS_IN_WORD; 112 | return add_empty_words(self, v, number); 113 | } 114 | 115 | static size_t add_literal(struct ewah_bitmap *self, eword_t new_data) 116 | { 117 | eword_t current_num = rlw_get_literal_words(self->rlw); 118 | 119 | if (current_num >= RLW_LARGEST_LITERAL_COUNT) { 120 | buffer_push_rlw(self, 0); 121 | 122 | rlw_set_literal_words(self->rlw, 1); 123 | buffer_push(self, new_data); 124 | return 2; 125 | } 126 | 127 | rlw_set_literal_words(self->rlw, current_num + 1); 128 | 129 | /* sanity check */ 130 | assert(rlw_get_literal_words(self->rlw) == current_num + 1); 131 | 132 | buffer_push(self, new_data); 133 | return 1; 134 | } 135 | 136 | void ewah_add_dirty_words( 137 | struct ewah_bitmap *self, const eword_t *buffer, size_t number, bool negate) 138 | { 139 | size_t literals, can_add; 140 | 141 | while (1) { 142 | literals = rlw_get_literal_words(self->rlw); 143 | can_add = min_size(number, RLW_LARGEST_LITERAL_COUNT - literals); 144 | 145 | rlw_set_literal_words(self->rlw, literals + can_add); 146 | 147 | if (self->buffer_size + can_add >= self->alloc_size) { 148 | buffer_grow(self, (self->buffer_size + can_add) * 1.5); 149 | } 150 | 151 | if (negate) { 152 | size_t i; 153 | for (i = 0; i < can_add; ++i) 154 | self->buffer[self->buffer_size++] = ~buffer[i]; 155 | } else { 156 | memcpy(self->buffer + self->buffer_size, buffer, can_add * sizeof(eword_t)); 157 | self->buffer_size += can_add; 158 | } 159 | 160 | self->bit_size += can_add * BITS_IN_WORD; 161 | 162 | if (number - can_add == 0) 163 | break; 164 | 165 | buffer_push_rlw(self, 0); 166 | buffer += can_add; 167 | number -= can_add; 168 | } 169 | } 170 | 171 | static size_t add_empty_word(struct ewah_bitmap *self, bool v) 172 | { 173 | bool no_literal = (rlw_get_literal_words(self->rlw) == 0); 174 | eword_t run_len = rlw_get_running_len(self->rlw); 175 | 176 | if (no_literal && run_len == 0) { 177 | rlw_set_run_bit(self->rlw, v); 178 | assert(rlw_get_run_bit(self->rlw) == v); 179 | } 180 | 181 | if (no_literal && rlw_get_run_bit(self->rlw) == v && 182 | run_len < RLW_LARGEST_RUNNING_COUNT) { 183 | rlw_set_running_len(self->rlw, run_len + 1); 184 | assert(rlw_get_running_len(self->rlw) == run_len + 1); 185 | return 0; 186 | } 187 | 188 | else { 189 | buffer_push_rlw(self, 0); 190 | 191 | assert(rlw_get_running_len(self->rlw) == 0); 192 | assert(rlw_get_run_bit(self->rlw) == 0); 193 | assert(rlw_get_literal_words(self->rlw) == 0); 194 | 195 | rlw_set_run_bit(self->rlw, v); 196 | assert(rlw_get_run_bit(self->rlw) == v); 197 | 198 | rlw_set_running_len(self->rlw, 1); 199 | assert(rlw_get_running_len(self->rlw) == 1); 200 | assert(rlw_get_literal_words(self->rlw) == 0); 201 | return 1; 202 | } 203 | } 204 | 205 | size_t ewah_add(struct ewah_bitmap *self, eword_t word) 206 | { 207 | self->bit_size += BITS_IN_WORD; 208 | 209 | if (word == 0) 210 | return add_empty_word(self, false); 211 | 212 | if (word == (eword_t)(~0)) 213 | return add_empty_word(self, true); 214 | 215 | return add_literal(self, word); 216 | } 217 | 218 | void ewah_set(struct ewah_bitmap *self, size_t i) 219 | { 220 | const size_t dist = 221 | (i + BITS_IN_WORD) / BITS_IN_WORD - 222 | (self->bit_size + BITS_IN_WORD - 1) / BITS_IN_WORD; 223 | 224 | assert(i >= self->bit_size); 225 | 226 | self->bit_size = i + 1; 227 | 228 | if (dist > 0) { 229 | if (dist > 1) 230 | add_empty_words(self, false, dist - 1); 231 | 232 | add_literal(self, (eword_t)1 << (i % BITS_IN_WORD)); 233 | return; 234 | } 235 | 236 | if (rlw_get_literal_words(self->rlw) == 0) { 237 | rlw_set_running_len(self->rlw, rlw_get_running_len(self->rlw) - 1); 238 | add_literal(self, (eword_t)1 << (i % BITS_IN_WORD)); 239 | return; 240 | } 241 | 242 | self->buffer[self->buffer_size - 1] |= ((eword_t)1 << (i % BITS_IN_WORD)); 243 | 244 | /* check if we just completed a stream of 1s */ 245 | if (self->buffer[self->buffer_size - 1] == (eword_t)(~0)) { 246 | self->buffer[--self->buffer_size] = 0; 247 | rlw_set_literal_words(self->rlw, rlw_get_literal_words(self->rlw) - 1); 248 | add_empty_word(self, true); 249 | } 250 | } 251 | 252 | void ewah_each_bit(struct ewah_bitmap *self, void (*callback)(size_t, void*), void *payload) 253 | { 254 | size_t pos = 0; 255 | size_t pointer = 0; 256 | size_t k; 257 | 258 | while (pointer < self->buffer_size) { 259 | eword_t *word = &self->buffer[pointer]; 260 | 261 | if (rlw_get_run_bit(word)) { 262 | size_t len = rlw_get_running_len(word) * BITS_IN_WORD; 263 | for (k = 0; k < len; ++k, ++pos) { 264 | callback(pos, payload); 265 | } 266 | } else { 267 | pos += rlw_get_running_len(word) * BITS_IN_WORD; 268 | } 269 | 270 | ++pointer; 271 | 272 | for (k = 0; k < rlw_get_literal_words(word); ++k) { 273 | int c; 274 | 275 | /* todo: zero count optimization */ 276 | for (c = 0; c < BITS_IN_WORD; ++c, ++pos) { 277 | if ((self->buffer[pointer] & ((eword_t)1 << c)) != 0) { 278 | callback(pos, payload); 279 | } 280 | } 281 | 282 | ++pointer; 283 | } 284 | } 285 | } 286 | 287 | struct ewah_bitmap *ewah_new(void) 288 | { 289 | struct ewah_bitmap *bitmap; 290 | 291 | bitmap = ewah_malloc(sizeof(struct ewah_bitmap)); 292 | if (bitmap == NULL) 293 | return NULL; 294 | 295 | bitmap->buffer = ewah_malloc(32 * sizeof(eword_t)); 296 | bitmap->alloc_size = 32; 297 | 298 | ewah_clear(bitmap); 299 | 300 | return bitmap; 301 | } 302 | 303 | void ewah_clear(struct ewah_bitmap *bitmap) 304 | { 305 | bitmap->buffer_size = 1; 306 | bitmap->buffer[0] = 0; 307 | bitmap->bit_size = 0; 308 | bitmap->rlw = bitmap->buffer; 309 | } 310 | 311 | void ewah_free(struct ewah_bitmap *bitmap) 312 | { 313 | free(bitmap->buffer); 314 | free(bitmap); 315 | } 316 | 317 | static void read_new_rlw(struct ewah_iterator *it) 318 | { 319 | const eword_t *word = NULL; 320 | 321 | it->literals = 0; 322 | it->compressed = 0; 323 | 324 | while (1) { 325 | word = &it->buffer[it->pointer]; 326 | 327 | it->rl = rlw_get_running_len(word); 328 | it->lw = rlw_get_literal_words(word); 329 | it->b = rlw_get_run_bit(word); 330 | 331 | if (it->rl || it->lw) 332 | return; 333 | 334 | if (it->pointer < it->buffer_size - 1) { 335 | it->pointer++; 336 | } else { 337 | it->pointer = it->buffer_size; 338 | return; 339 | } 340 | } 341 | } 342 | 343 | bool ewah_iterator_next(eword_t *next, struct ewah_iterator *it) 344 | { 345 | if (it->pointer >= it->buffer_size) 346 | return false; 347 | 348 | if (it->compressed < it->rl) { 349 | it->compressed++; 350 | *next = it->b ? (eword_t)(~0) : 0; 351 | } else { 352 | assert(it->literals < it->lw); 353 | 354 | it->literals++; 355 | it->pointer++; 356 | 357 | assert(it->pointer < it->buffer_size); 358 | 359 | *next = it->buffer[it->pointer]; 360 | } 361 | 362 | if (it->compressed == it->rl && it->literals == it->lw) { 363 | if (++it->pointer < it->buffer_size) 364 | read_new_rlw(it); 365 | } 366 | 367 | return true; 368 | } 369 | 370 | void ewah_iterator_init(struct ewah_iterator *it, struct ewah_bitmap *parent) 371 | { 372 | it->buffer = parent->buffer; 373 | it->buffer_size = parent->buffer_size; 374 | it->pointer = 0; 375 | 376 | it->lw = 0; 377 | it->rl = 0; 378 | it->compressed = 0; 379 | it->literals = 0; 380 | it->b = false; 381 | 382 | if (it->pointer < it->buffer_size) 383 | read_new_rlw(it); 384 | } 385 | 386 | void ewah_dump(struct ewah_bitmap *bitmap) 387 | { 388 | size_t i; 389 | printf("%zu bits | %zu words | ", bitmap->bit_size, bitmap->buffer_size); 390 | 391 | for (i = 0; i < bitmap->buffer_size; ++i) 392 | printf("%016llx ", (unsigned long long)bitmap->buffer[i]); 393 | 394 | printf("\n"); 395 | } 396 | 397 | void ewah_not(struct ewah_bitmap *self) 398 | { 399 | size_t pointer = 0; 400 | 401 | while (pointer < self->buffer_size) { 402 | eword_t *word = &self->buffer[pointer]; 403 | size_t literals, k; 404 | 405 | rlw_xor_run_bit(word); 406 | ++pointer; 407 | 408 | literals = rlw_get_literal_words(word); 409 | for (k = 0; k < literals; ++k) { 410 | self->buffer[pointer] = ~self->buffer[pointer]; 411 | ++pointer; 412 | } 413 | } 414 | } 415 | 416 | void ewah_xor( 417 | struct ewah_bitmap *bitmap_i, 418 | struct ewah_bitmap *bitmap_j, 419 | struct ewah_bitmap *out) 420 | { 421 | struct rlw_iterator rlw_i; 422 | struct rlw_iterator rlw_j; 423 | 424 | rlwit_init(&rlw_i, bitmap_i); 425 | rlwit_init(&rlw_j, bitmap_j); 426 | 427 | while (rlwit_word_size(&rlw_i) > 0 && rlwit_word_size(&rlw_j) > 0) { 428 | while (rlw_i.rlw.running_len > 0 || rlw_j.rlw.running_len > 0) { 429 | struct rlw_iterator *prey, *predator; 430 | size_t index; 431 | bool negate_words; 432 | 433 | if (rlw_i.rlw.running_len < rlw_j.rlw.running_len) { 434 | prey = &rlw_i; 435 | predator = &rlw_j; 436 | } else { 437 | prey = &rlw_j; 438 | predator = &rlw_i; 439 | } 440 | 441 | negate_words = !!predator->rlw.running_bit; 442 | index = rlwit_discharge(prey, out, predator->rlw.running_len, negate_words); 443 | 444 | ewah_add_empty_words(out, negate_words, predator->rlw.running_len - index); 445 | rlwit_discard_first_words(predator, predator->rlw.running_len); 446 | } 447 | 448 | size_t literals = min_size(rlw_i.rlw.literal_words, rlw_j.rlw.literal_words); 449 | 450 | if (literals) { 451 | size_t k; 452 | 453 | for (k = 0; k < literals; ++k) { 454 | ewah_add(out, 455 | rlw_i.buffer[rlw_i.literal_word_start + k] ^ 456 | rlw_j.buffer[rlw_j.literal_word_start + k] 457 | ); 458 | } 459 | 460 | rlwit_discard_first_words(&rlw_i, literals); 461 | rlwit_discard_first_words(&rlw_j, literals); 462 | } 463 | } 464 | 465 | if (rlwit_word_size(&rlw_i) > 0) { 466 | rlwit_discharge(&rlw_i, out, ~0, false); 467 | } else { 468 | rlwit_discharge(&rlw_j, out, ~0, false); 469 | } 470 | 471 | out->bit_size = max_size(bitmap_i->bit_size, bitmap_j->bit_size); 472 | } 473 | 474 | void ewah_and( 475 | struct ewah_bitmap *bitmap_i, 476 | struct ewah_bitmap *bitmap_j, 477 | struct ewah_bitmap *out) 478 | { 479 | struct rlw_iterator rlw_i; 480 | struct rlw_iterator rlw_j; 481 | 482 | rlwit_init(&rlw_i, bitmap_i); 483 | rlwit_init(&rlw_j, bitmap_j); 484 | 485 | while (rlwit_word_size(&rlw_i) > 0 && rlwit_word_size(&rlw_j) > 0) { 486 | while (rlw_i.rlw.running_len > 0 || rlw_j.rlw.running_len > 0) { 487 | struct rlw_iterator *prey, *predator; 488 | 489 | if (rlw_i.rlw.running_len < rlw_j.rlw.running_len) { 490 | prey = &rlw_i; 491 | predator = &rlw_j; 492 | } else { 493 | prey = &rlw_j; 494 | predator = &rlw_i; 495 | } 496 | 497 | if (predator->rlw.running_bit == 0) { 498 | ewah_add_empty_words(out, false, predator->rlw.running_len); 499 | rlwit_discard_first_words(prey, predator->rlw.running_len); 500 | rlwit_discard_first_words(predator, predator->rlw.running_len); 501 | } else { 502 | size_t index; 503 | index = rlwit_discharge(prey, out, predator->rlw.running_len, false); 504 | ewah_add_empty_words(out, false, predator->rlw.running_len - index); 505 | rlwit_discard_first_words(predator, predator->rlw.running_len); 506 | } 507 | } 508 | 509 | size_t literals = min_size(rlw_i.rlw.literal_words, rlw_j.rlw.literal_words); 510 | 511 | if (literals) { 512 | size_t k; 513 | 514 | for (k = 0; k < literals; ++k) { 515 | ewah_add(out, 516 | rlw_i.buffer[rlw_i.literal_word_start + k] & 517 | rlw_j.buffer[rlw_j.literal_word_start + k] 518 | ); 519 | } 520 | 521 | rlwit_discard_first_words(&rlw_i, literals); 522 | rlwit_discard_first_words(&rlw_j, literals); 523 | } 524 | } 525 | 526 | if (rlwit_word_size(&rlw_i) > 0) { 527 | rlwit_discharge_empty(&rlw_i, out); 528 | } else { 529 | rlwit_discharge_empty(&rlw_j, out); 530 | } 531 | 532 | out->bit_size = max_size(bitmap_i->bit_size, bitmap_j->bit_size); 533 | } 534 | 535 | void ewah_and_not( 536 | struct ewah_bitmap *bitmap_i, 537 | struct ewah_bitmap *bitmap_j, 538 | struct ewah_bitmap *out) 539 | { 540 | struct rlw_iterator rlw_i; 541 | struct rlw_iterator rlw_j; 542 | 543 | rlwit_init(&rlw_i, bitmap_i); 544 | rlwit_init(&rlw_j, bitmap_j); 545 | 546 | while (rlwit_word_size(&rlw_i) > 0 && rlwit_word_size(&rlw_j) > 0) { 547 | while (rlw_i.rlw.running_len > 0 || rlw_j.rlw.running_len > 0) { 548 | struct rlw_iterator *prey, *predator; 549 | 550 | if (rlw_i.rlw.running_len < rlw_j.rlw.running_len) { 551 | prey = &rlw_i; 552 | predator = &rlw_j; 553 | } else { 554 | prey = &rlw_j; 555 | predator = &rlw_i; 556 | } 557 | 558 | if ((predator->rlw.running_bit && prey == &rlw_i) || 559 | (!predator->rlw.running_bit && prey != &rlw_i)) { 560 | ewah_add_empty_words(out, false, predator->rlw.running_len); 561 | rlwit_discard_first_words(prey, predator->rlw.running_len); 562 | rlwit_discard_first_words(predator, predator->rlw.running_len); 563 | } else { 564 | size_t index; 565 | bool negate_words; 566 | 567 | negate_words = (&rlw_i != prey); 568 | index = rlwit_discharge(prey, out, predator->rlw.running_len, negate_words); 569 | ewah_add_empty_words(out, negate_words, predator->rlw.running_len - index); 570 | rlwit_discard_first_words(predator, predator->rlw.running_len); 571 | } 572 | } 573 | 574 | size_t literals = min_size(rlw_i.rlw.literal_words, rlw_j.rlw.literal_words); 575 | 576 | if (literals) { 577 | size_t k; 578 | 579 | for (k = 0; k < literals; ++k) { 580 | ewah_add(out, 581 | rlw_i.buffer[rlw_i.literal_word_start + k] & 582 | ~(rlw_j.buffer[rlw_j.literal_word_start + k]) 583 | ); 584 | } 585 | 586 | rlwit_discard_first_words(&rlw_i, literals); 587 | rlwit_discard_first_words(&rlw_j, literals); 588 | } 589 | } 590 | 591 | if (rlwit_word_size(&rlw_i) > 0) { 592 | rlwit_discharge(&rlw_i, out, ~0, false); 593 | } else { 594 | rlwit_discharge_empty(&rlw_j, out); 595 | } 596 | 597 | out->bit_size = max_size(bitmap_i->bit_size, bitmap_j->bit_size); 598 | } 599 | 600 | void ewah_or( 601 | struct ewah_bitmap *bitmap_i, 602 | struct ewah_bitmap *bitmap_j, 603 | struct ewah_bitmap *out) 604 | { 605 | struct rlw_iterator rlw_i; 606 | struct rlw_iterator rlw_j; 607 | 608 | rlwit_init(&rlw_i, bitmap_i); 609 | rlwit_init(&rlw_j, bitmap_j); 610 | 611 | while (rlwit_word_size(&rlw_i) > 0 && rlwit_word_size(&rlw_j) > 0) { 612 | while (rlw_i.rlw.running_len > 0 || rlw_j.rlw.running_len > 0) { 613 | struct rlw_iterator *prey, *predator; 614 | 615 | if (rlw_i.rlw.running_len < rlw_j.rlw.running_len) { 616 | prey = &rlw_i; 617 | predator = &rlw_j; 618 | } else { 619 | prey = &rlw_j; 620 | predator = &rlw_i; 621 | } 622 | 623 | 624 | if (predator->rlw.running_bit) { 625 | ewah_add_empty_words(out, false, predator->rlw.running_len); 626 | rlwit_discard_first_words(prey, predator->rlw.running_len); 627 | rlwit_discard_first_words(predator, predator->rlw.running_len); 628 | } else { 629 | size_t index; 630 | index = rlwit_discharge(prey, out, predator->rlw.running_len, false); 631 | ewah_add_empty_words(out, false, predator->rlw.running_len - index); 632 | rlwit_discard_first_words(predator, predator->rlw.running_len); 633 | } 634 | } 635 | 636 | size_t literals = min_size(rlw_i.rlw.literal_words, rlw_j.rlw.literal_words); 637 | 638 | if (literals) { 639 | size_t k; 640 | 641 | for (k = 0; k < literals; ++k) { 642 | ewah_add(out, 643 | rlw_i.buffer[rlw_i.literal_word_start + k] | 644 | rlw_j.buffer[rlw_j.literal_word_start + k] 645 | ); 646 | } 647 | 648 | rlwit_discard_first_words(&rlw_i, literals); 649 | rlwit_discard_first_words(&rlw_j, literals); 650 | } 651 | } 652 | 653 | if (rlwit_word_size(&rlw_i) > 0) { 654 | rlwit_discharge(&rlw_i, out, ~0, false); 655 | } else { 656 | rlwit_discharge(&rlw_j, out, ~0, false); 657 | } 658 | 659 | out->bit_size = max_size(bitmap_i->bit_size, bitmap_j->bit_size); 660 | } 661 | -------------------------------------------------------------------------------- /ewah_io.c: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright 2013, GitHub, Inc 3 | * Copyright 2009-2013, Daniel Lemire, Cliff Moon, 4 | * David McIntosh, Robert Becho, Google Inc. and Veronika Zenz 5 | * 6 | * This program is free software; you can redistribute it and/or 7 | * modify it under the terms of the GNU General Public License 8 | * as published by the Free Software Foundation; either version 2 9 | * of the License, or (at your option) any later version. 10 | * 11 | * This program is distributed in the hope that it will be useful, 12 | * but WITHOUT ANY WARRANTY; without even the implied warranty of 13 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 14 | * GNU General Public License for more details. 15 | * 16 | * You should have received a copy of the GNU General Public License 17 | * along with this program; if not, write to the Free Software 18 | * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. 19 | */ 20 | #include 21 | #include 22 | 23 | #include "ewok.h" 24 | 25 | #if defined(__linux__) 26 | # include 27 | #elif defined(__FreeBSD__) || defined(__NetBSD__) 28 | # include 29 | #elif defined(__OpenBSD__) 30 | # include 31 | # define be16toh(x) betoh16(x) 32 | # define be32toh(x) betoh32(x) 33 | # define be64toh(x) betoh64(x) 34 | #endif 35 | 36 | int ewah_serialize(struct ewah_bitmap *self, int fd) 37 | { 38 | size_t i; 39 | eword_t dump[2048]; 40 | const size_t words_per_dump = sizeof(dump) / sizeof(eword_t); 41 | 42 | /* 32 bit -- bit size fr the map */ 43 | uint32_t bitsize = htobe32((uint32_t)self->bit_size); 44 | if (write(fd, &bitsize, 4) != 4) 45 | return -1; 46 | 47 | /** 32 bit -- number of compressed 64-bit words */ 48 | uint32_t word_count = htobe32((uint32_t)self->buffer_size); 49 | if (write(fd, &word_count, 4) != 4) 50 | return -1; 51 | 52 | /** 64 bit x N -- compressed words */ 53 | const eword_t *buffer = self->buffer; 54 | size_t words_left = self->buffer_size; 55 | 56 | while (words_left >= words_per_dump) { 57 | for (i = 0; i < words_per_dump; ++i, ++buffer) 58 | dump[i] = htobe64(*buffer); 59 | 60 | if (write(fd, dump, sizeof(dump)) != sizeof(dump)) 61 | return -1; 62 | 63 | words_left -= words_per_dump; 64 | } 65 | 66 | if (words_left) { 67 | for (i = 0; i < words_left; ++i, ++buffer) 68 | dump[i] = htobe64(*buffer); 69 | 70 | if (write(fd, dump, words_left * 8) != words_left * 8) 71 | return -1; 72 | } 73 | 74 | /** 32 bit -- position for the RLW */ 75 | uint32_t rlw_pos = (uint8_t*)self->rlw - (uint8_t *)self->buffer; 76 | rlw_pos = htobe32(rlw_pos / sizeof(eword_t)); 77 | 78 | if (write(fd, &rlw_pos, 4) != 4) 79 | return -1; 80 | 81 | return 0; 82 | } 83 | 84 | int ewah_deserialize(struct ewah_bitmap *self, int fd) 85 | { 86 | size_t i; 87 | eword_t dump[2048]; 88 | const size_t words_per_dump = sizeof(dump) / sizeof(eword_t); 89 | 90 | /* 32 bit -- bit size fr the map */ 91 | uint32_t bitsize; 92 | if (read(fd, &bitsize, 4) != 4) 93 | return -1; 94 | 95 | self->bit_size = (size_t)be32toh(bitsize); 96 | 97 | /** 32 bit -- number of compressed 64-bit words */ 98 | uint32_t word_count; 99 | if (read(fd, &word_count, 4) != 4) 100 | return -1; 101 | 102 | self->buffer_size = (size_t)be32toh(word_count); 103 | self->buffer = ewah_realloc(self->buffer, self->buffer_size * sizeof(eword_t)); 104 | 105 | if (!self->buffer) 106 | return -1; 107 | 108 | /** 64 bit x N -- compressed words */ 109 | eword_t *buffer = self->buffer; 110 | size_t words_left = self->buffer_size; 111 | 112 | while (words_left >= words_per_dump) { 113 | if (read(fd, dump, sizeof(dump)) != sizeof(dump)) 114 | return -1; 115 | 116 | for (i = 0; i < words_per_dump; ++i, ++buffer) 117 | *buffer = be64toh(dump[i]); 118 | 119 | words_left -= words_per_dump; 120 | } 121 | 122 | if (words_left) { 123 | if (read(fd, dump, words_left * 8) != words_left * 8) 124 | return -1; 125 | 126 | for (i = 0; i < words_left; ++i, ++buffer) 127 | *buffer = be64toh(dump[i]); 128 | } 129 | 130 | /** 32 bit -- position for the RLW */ 131 | uint32_t rlw_pos; 132 | if (read(fd, &rlw_pos, 4) != 4) 133 | return -1; 134 | 135 | self->rlw = self->buffer + be32toh(rlw_pos); 136 | 137 | return 0; 138 | } 139 | -------------------------------------------------------------------------------- /ewah_rlw.c: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright 2013, GitHub, Inc 3 | * Copyright 2009-2013, Daniel Lemire, Cliff Moon, 4 | * David McIntosh, Robert Becho, Google Inc. and Veronika Zenz 5 | * 6 | * This program is free software; you can redistribute it and/or 7 | * modify it under the terms of the GNU General Public License 8 | * as published by the Free Software Foundation; either version 2 9 | * of the License, or (at your option) any later version. 10 | * 11 | * This program is distributed in the hope that it will be useful, 12 | * but WITHOUT ANY WARRANTY; without even the implied warranty of 13 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 14 | * GNU General Public License for more details. 15 | * 16 | * You should have received a copy of the GNU General Public License 17 | * along with this program; if not, write to the Free Software 18 | * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. 19 | */ 20 | #include 21 | #include 22 | #include 23 | #include 24 | 25 | #include "ewok.h" 26 | #include "ewok_rlw.h" 27 | 28 | static inline bool next_word(struct rlw_iterator *it) 29 | { 30 | if (it->pointer >= it->size) 31 | return false; 32 | 33 | it->rlw.word = &it->buffer[it->pointer]; 34 | it->pointer += rlw_get_literal_words(it->rlw.word) + 1; 35 | 36 | it->rlw.literal_words = rlw_get_literal_words(it->rlw.word); 37 | it->rlw.running_len = rlw_get_running_len(it->rlw.word); 38 | it->rlw.running_bit = rlw_get_run_bit(it->rlw.word); 39 | it->rlw.literal_word_offset = 0; 40 | 41 | return true; 42 | } 43 | 44 | void rlwit_init(struct rlw_iterator *it, struct ewah_bitmap *bitmap) 45 | { 46 | it->buffer = bitmap->buffer; 47 | it->size = bitmap->buffer_size; 48 | it->pointer = 0; 49 | 50 | next_word(it); 51 | 52 | it->literal_word_start = rlwit_literal_words(it) + it->rlw.literal_word_offset; 53 | } 54 | 55 | void rlwit_discard_first_words(struct rlw_iterator *it, size_t x) 56 | { 57 | while (x > 0) { 58 | size_t discard; 59 | 60 | if (it->rlw.running_len > x) { 61 | it->rlw.running_len -= x; 62 | return; 63 | } 64 | 65 | x -= it->rlw.running_len; 66 | it->rlw.running_len = 0; 67 | 68 | discard = (x > it->rlw.literal_words) ? it->rlw.literal_words : x; 69 | 70 | it->literal_word_start += discard; 71 | it->rlw.literal_words -= discard; 72 | x -= discard; 73 | 74 | if (x > 0 || rlwit_word_size(it) == 0) { 75 | if (!next_word(it)) 76 | break; 77 | 78 | it->literal_word_start = 79 | rlwit_literal_words(it) + it->rlw.literal_word_offset; 80 | } 81 | } 82 | } 83 | 84 | size_t rlwit_discharge( 85 | struct rlw_iterator *it, struct ewah_bitmap *out, size_t max, bool negate) 86 | { 87 | size_t index = 0; 88 | 89 | while (index < max && rlwit_word_size(it) > 0) { 90 | size_t pd, pl = it->rlw.running_len; 91 | 92 | if (index + pl > max) { 93 | pl = max - index; 94 | } 95 | 96 | ewah_add_empty_words(out, it->rlw.running_bit ^ negate, pl); 97 | index += pl; 98 | 99 | pd = it->rlw.literal_words; 100 | if (pd + index > max) { 101 | pd = max - index; 102 | } 103 | 104 | ewah_add_dirty_words(out, 105 | it->buffer + it->literal_word_start, pd, negate); 106 | 107 | rlwit_discard_first_words(it, pd + pl); 108 | index += pd; 109 | } 110 | 111 | return index; 112 | } 113 | 114 | void rlwit_discharge_empty(struct rlw_iterator *it, struct ewah_bitmap *out) 115 | { 116 | while (rlwit_word_size(it) > 0) { 117 | ewah_add_empty_words(out, false, rlwit_word_size(it)); 118 | rlwit_discard_first_words(it, rlwit_word_size(it)); 119 | } 120 | } 121 | -------------------------------------------------------------------------------- /ewok.h: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright 2013, GitHub, Inc 3 | * Copyright 2009-2013, Daniel Lemire, Cliff Moon, 4 | * David McIntosh, Robert Becho, Google Inc. and Veronika Zenz 5 | * 6 | * This program is free software; you can redistribute it and/or 7 | * modify it under the terms of the GNU General Public License 8 | * as published by the Free Software Foundation; either version 2 9 | * of the License, or (at your option) any later version. 10 | * 11 | * This program is distributed in the hope that it will be useful, 12 | * but WITHOUT ANY WARRANTY; without even the implied warranty of 13 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 14 | * GNU General Public License for more details. 15 | * 16 | * You should have received a copy of the GNU General Public License 17 | * along with this program; if not, write to the Free Software 18 | * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. 19 | */ 20 | #ifndef __EWOK_BITMAP_C__ 21 | #define __EWOK_BITMAP_C__ 22 | 23 | #include 24 | #include 25 | 26 | #ifndef ewah_malloc 27 | # define ewah_malloc malloc 28 | #endif 29 | #ifndef ewah_realloc 30 | # define ewah_realloc realloc 31 | #endif 32 | #ifndef ewah_calloc 33 | # define ewah_calloc calloc 34 | #endif 35 | 36 | typedef uint64_t eword_t; 37 | #define BITS_IN_WORD (sizeof(eword_t) * 8) 38 | 39 | struct ewah_bitmap { 40 | eword_t *buffer; 41 | size_t buffer_size; 42 | size_t alloc_size; 43 | size_t bit_size; 44 | eword_t *rlw; 45 | }; 46 | 47 | /** 48 | * Allocate a new EWAH Compressed bitmap 49 | */ 50 | struct ewah_bitmap *ewah_new(void); 51 | 52 | /** 53 | * Clear all the bits in the bitmap. Does not free or resize 54 | * memory. 55 | */ 56 | void ewah_clear(struct ewah_bitmap *bitmap); 57 | 58 | /** 59 | * Free all the memory of the bitmap 60 | */ 61 | void ewah_free(struct ewah_bitmap *bitmap); 62 | 63 | /** 64 | * Load a bitmap from a file descriptor. An empty `ewah_bitmap` instance 65 | * must have been allocated beforehand. 66 | * 67 | * The fd must be open in read mode. 68 | * 69 | * Returns: 0 on success, -1 if a reading error occured (check errno) 70 | */ 71 | int ewah_deserialize(struct ewah_bitmap *self, int fd); 72 | 73 | /** 74 | * Dump an existing bitmap to a file descriptor. The bitmap 75 | * is dumped in compressed form, with the following structure: 76 | * 77 | * | bit_count | number_of_words | words... | rlw_position 78 | * 79 | * The fd must be open in write mode. 80 | * 81 | * Returns: 0 on success, -1 if a writing error occured (check errno) 82 | */ 83 | int ewah_serialize(struct ewah_bitmap *self, int fd); 84 | 85 | /** 86 | * Logical not (bitwise negation) in-place on the bitmap 87 | * 88 | * This operation is linear time based on the size of the bitmap. 89 | */ 90 | void ewah_not(struct ewah_bitmap *self); 91 | 92 | /** 93 | * Call the given callback with the position of every single bit 94 | * that has been set on the bitmap. 95 | * 96 | * This is an efficient operation that does not fully decompress 97 | * the bitmap. 98 | */ 99 | void ewah_each_bit(struct ewah_bitmap *self, void (*callback)(size_t, void*), void *payload); 100 | 101 | /** 102 | * Set a given bit on the bitmap. 103 | * 104 | * The bit at position `pos` will be set to true. Because of the 105 | * way that the bitmap is compressed, a set bit cannot be unset 106 | * later on. 107 | * 108 | * Furthermore, since the bitmap uses streaming compression, bits 109 | * can only set incrementally. 110 | * 111 | * E.g. 112 | * ewah_set(bitmap, 1); // ok 113 | * ewah_set(bitmap, 76); // ok 114 | * ewah_set(bitmap, 77); // ok 115 | * ewah_set(bitmap, 8712800127); // ok 116 | * ewah_set(bitmap, 25); // failed, assert raised 117 | */ 118 | void ewah_set(struct ewah_bitmap *self, size_t i); 119 | 120 | /** 121 | * Add a stream of empty words to the bitstream 122 | * 123 | * This is an internal operation used to efficiently generate 124 | * compressed bitmaps. 125 | */ 126 | size_t ewah_add_empty_words(struct ewah_bitmap *self, bool v, size_t number); 127 | 128 | struct ewah_iterator { 129 | const eword_t *buffer; 130 | size_t buffer_size; 131 | 132 | size_t pointer; 133 | eword_t compressed, literals; 134 | eword_t rl, lw; 135 | bool b; 136 | }; 137 | 138 | /** 139 | * Initialize a new iterator to run through the bitmap in uncompressed form. 140 | * 141 | * The iterator can be stack allocated. The underlying bitmap must not be freed 142 | * before the iteration is over. 143 | * 144 | * E.g. 145 | * 146 | * struct ewah_bitmap *bitmap = ewah_new(); 147 | * struct ewah_iterator it; 148 | * 149 | * ewah_iterator_init(&it, bitmap); 150 | */ 151 | void ewah_iterator_init(struct ewah_iterator *it, struct ewah_bitmap *parent); 152 | 153 | /** 154 | * Yield every single word in the bitmap in uncompressed form. This is: 155 | * yield single words (32-64 bits) where each bit represents an actual 156 | * bit from the bitmap. 157 | * 158 | * Return: true if a word was yield, false if there are no words left 159 | */ 160 | bool ewah_iterator_next(eword_t *next, struct ewah_iterator *it); 161 | 162 | void ewah_or( 163 | struct ewah_bitmap *bitmap_i, 164 | struct ewah_bitmap *bitmap_j, 165 | struct ewah_bitmap *out); 166 | 167 | void ewah_and_not( 168 | struct ewah_bitmap *bitmap_i, 169 | struct ewah_bitmap *bitmap_j, 170 | struct ewah_bitmap *out); 171 | 172 | void ewah_xor( 173 | struct ewah_bitmap *bitmap_i, 174 | struct ewah_bitmap *bitmap_j, 175 | struct ewah_bitmap *out); 176 | 177 | void ewah_and( 178 | struct ewah_bitmap *bitmap_i, 179 | struct ewah_bitmap *bitmap_j, 180 | struct ewah_bitmap *out); 181 | 182 | void ewah_dump(struct ewah_bitmap *bitmap); 183 | 184 | void ewah_add_dirty_words( 185 | struct ewah_bitmap *self, const eword_t *buffer, size_t number, bool negate); 186 | 187 | /** 188 | * Uncompressed, old-school bitmap that can be efficiently compressed 189 | * into an `ewah_bitmap`. 190 | */ 191 | struct bitmap { 192 | eword_t *words; 193 | size_t word_alloc; 194 | }; 195 | 196 | struct bitmap *bitmap_new(void); 197 | void bitmap_set(struct bitmap *self, size_t pos); 198 | void bitmap_clear(struct bitmap *self, size_t pos); 199 | bool bitmap_get(struct bitmap *self, size_t pos); 200 | 201 | struct ewah_bitmap * bitmap_to_ewah(struct bitmap *bitmap); 202 | struct bitmap *ewah_to_bitmap(struct ewah_bitmap *ewah); 203 | 204 | #endif 205 | -------------------------------------------------------------------------------- /ewok_rlw.h: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright 2013, GitHub, Inc 3 | * Copyright 2009-2013, Daniel Lemire, Cliff Moon, 4 | * David McIntosh, Robert Becho, Google Inc. and Veronika Zenz 5 | * 6 | * This program is free software; you can redistribute it and/or 7 | * modify it under the terms of the GNU General Public License 8 | * as published by the Free Software Foundation; either version 2 9 | * of the License, or (at your option) any later version. 10 | * 11 | * This program is distributed in the hope that it will be useful, 12 | * but WITHOUT ANY WARRANTY; without even the implied warranty of 13 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 14 | * GNU General Public License for more details. 15 | * 16 | * You should have received a copy of the GNU General Public License 17 | * along with this program; if not, write to the Free Software 18 | * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. 19 | */ 20 | #ifndef __EWOK_RLW_H__ 21 | #define __EWOK_RLW_H__ 22 | 23 | #define RLW_RUNNING_BITS (sizeof(eword_t) * 4) 24 | #define RLW_LITERAL_BITS (sizeof(eword_t) * 8 - 1 - RLW_RUNNING_BITS) 25 | 26 | #define RLW_LARGEST_RUNNING_COUNT (((eword_t)1 << RLW_RUNNING_BITS) - 1) 27 | #define RLW_LARGEST_LITERAL_COUNT (((eword_t)1 << RLW_LITERAL_BITS) - 1) 28 | 29 | #define RLW_LARGEST_RUNNING_COUNT_SHIFT (RLW_LARGEST_RUNNING_COUNT << 1) 30 | 31 | #define RLW_RUNNING_LEN_PLUS_BIT (((eword_t)1 << (RLW_RUNNING_BITS + 1)) - 1) 32 | 33 | static bool rlw_get_run_bit(const eword_t *word) 34 | { 35 | return *word & (eword_t)1; 36 | } 37 | 38 | static inline void rlw_set_run_bit(eword_t *word, bool b) 39 | { 40 | if (b) { 41 | *word |= (eword_t)1; 42 | } else { 43 | *word &= (eword_t)(~1); 44 | } 45 | } 46 | 47 | static inline void rlw_xor_run_bit(eword_t *word) 48 | { 49 | if (*word & 1) { 50 | *word &= (eword_t)(~1); 51 | } else { 52 | *word |= (eword_t)1; 53 | } 54 | } 55 | 56 | static inline void rlw_set_running_len(eword_t *word, eword_t l) 57 | { 58 | *word |= RLW_LARGEST_RUNNING_COUNT_SHIFT; 59 | *word &= (l << 1) | (~RLW_LARGEST_RUNNING_COUNT_SHIFT); 60 | } 61 | 62 | static inline eword_t rlw_get_running_len(const eword_t *word) 63 | { 64 | return (*word >> 1) & RLW_LARGEST_RUNNING_COUNT; 65 | } 66 | 67 | static inline eword_t rlw_get_literal_words(const eword_t *word) 68 | { 69 | return *word >> (1 + RLW_RUNNING_BITS); 70 | } 71 | 72 | static inline void rlw_set_literal_words(eword_t *word, eword_t l) 73 | { 74 | *word |= ~RLW_RUNNING_LEN_PLUS_BIT; 75 | *word &= (l << (RLW_RUNNING_BITS + 1)) | RLW_RUNNING_LEN_PLUS_BIT; 76 | } 77 | 78 | static inline eword_t rlw_size(const eword_t *self) 79 | { 80 | return rlw_get_running_len(self) + rlw_get_literal_words(self); 81 | } 82 | 83 | struct rlw_iterator { 84 | const eword_t *buffer; 85 | size_t size; 86 | size_t pointer; 87 | size_t literal_word_start; 88 | 89 | struct { 90 | const eword_t *word; 91 | int literal_words; 92 | int running_len; 93 | int literal_word_offset; 94 | int running_bit; 95 | } rlw; 96 | }; 97 | 98 | void rlwit_init(struct rlw_iterator *it, struct ewah_bitmap *bitmap); 99 | void rlwit_discard_first_words(struct rlw_iterator *it, size_t x); 100 | size_t rlwit_discharge( 101 | struct rlw_iterator *it, struct ewah_bitmap *out, size_t max, bool negate); 102 | void rlwit_discharge_empty(struct rlw_iterator *it, struct ewah_bitmap *out); 103 | 104 | static inline size_t rlwit_word_size(struct rlw_iterator *it) 105 | { 106 | return it->rlw.running_len + it->rlw.literal_words; 107 | } 108 | 109 | static inline size_t rlwit_literal_words(struct rlw_iterator *it) 110 | { 111 | return it->pointer - it->rlw.literal_words; 112 | } 113 | 114 | #endif 115 | 116 | -------------------------------------------------------------------------------- /test/test_logical.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include "ewok.h" 4 | 5 | static void cb__blowup_test(size_t pos, void *payload) 6 | { 7 | struct bitmap *bm = payload; 8 | bitmap_set(bm, pos); 9 | } 10 | 11 | static void verify_blowup(struct ewah_bitmap *ewah, struct bitmap *blowup) 12 | { 13 | struct bitmap *aux = bitmap_new(); 14 | size_t i; 15 | ewah_each_bit(ewah, &cb__blowup_test, aux); 16 | 17 | for (i = 0; i < aux->word_alloc; ++i) { 18 | if (aux->words[i] != blowup->words[i]) { 19 | fprintf(stderr, "[%zu / %zu] %016llx vs %016llx ## FAIL \n", 20 | i, aux->word_alloc, 21 | (unsigned long long)aux->words[i], 22 | (unsigned long long)blowup->words[i]); 23 | exit(-1); 24 | } 25 | 26 | } 27 | 28 | bitmap_free(aux); 29 | } 30 | 31 | static size_t op_xor(size_t a, size_t b) 32 | { 33 | return a ^ b; 34 | } 35 | 36 | static size_t op_and(size_t a, size_t b) 37 | { 38 | return a & b; 39 | } 40 | 41 | static size_t op_or(size_t a, size_t b) 42 | { 43 | return a | b; 44 | } 45 | 46 | static size_t op_andnot(size_t a, size_t b) 47 | { 48 | return a & ~b; 49 | } 50 | 51 | static bool verify_operation( 52 | struct ewah_bitmap *_a, struct ewah_bitmap *_b, 53 | struct ewah_bitmap *_result, size_t (*op)(size_t, size_t)) 54 | { 55 | struct bitmap *a = ewah_to_bitmap(_a); 56 | struct bitmap *b = ewah_to_bitmap(_b); 57 | struct bitmap *result = ewah_to_bitmap(_result); 58 | size_t i; 59 | bool ok = true; 60 | 61 | verify_blowup(_a, a); 62 | verify_blowup(_b, b); 63 | verify_blowup(_result, result); 64 | 65 | fprintf(stderr, "%zu ", _result->bit_size / BITS_IN_WORD); 66 | 67 | for (i = 0; i < _result->bit_size / BITS_IN_WORD; ++i) { 68 | size_t r = op(a->words[i], b->words[i]); 69 | 70 | if (r != result->words[i]) { 71 | fprintf(stderr, "\nMiss [%zu / %zu] GOT %016llX EXPECT %016llX\n", 72 | i, result->word_alloc, 73 | (unsigned long long)r, 74 | (unsigned long long)result->words[i] 75 | ); 76 | ok = false; 77 | break; 78 | } 79 | } 80 | 81 | bitmap_free(a); 82 | bitmap_free(b); 83 | bitmap_free(result); 84 | 85 | return ok; 86 | } 87 | 88 | static void cb__test_print(size_t pos, void *p) 89 | { 90 | printf("%zu, ", pos); 91 | } 92 | 93 | static void print_bitmap(const char *name, struct ewah_bitmap *bitmap) 94 | { 95 | printf("%s = {", name); 96 | ewah_each_bit(bitmap, &cb__test_print, NULL); 97 | printf("};\n\n"); 98 | } 99 | 100 | static struct ewah_bitmap *generate_bitmap(size_t max_size) 101 | { 102 | static const size_t BIT_CHANCE = 50; 103 | 104 | struct ewah_bitmap *bitmap = ewah_new(); 105 | size_t i; 106 | 107 | for (i = 0; i < max_size; ++i) { 108 | if (rand() % 100 <= BIT_CHANCE) 109 | ewah_set(bitmap, i); 110 | } 111 | 112 | return bitmap; 113 | } 114 | 115 | static void test_for_size(size_t size) 116 | { 117 | struct ewah_bitmap *a = generate_bitmap(size); 118 | struct ewah_bitmap *b = generate_bitmap(size); 119 | struct ewah_bitmap *result = ewah_new(); 120 | size_t i; 121 | 122 | struct { 123 | const char *name; 124 | void (*generate)(struct ewah_bitmap *, struct ewah_bitmap *, struct ewah_bitmap *); 125 | size_t (*check)(size_t, size_t); 126 | } tests[] = { 127 | {"or", &ewah_or, &op_or}, 128 | {"xor", &ewah_xor, &op_xor}, 129 | {"and", &ewah_and, &op_and}, 130 | {"and-not", &ewah_and_not, &op_andnot} 131 | }; 132 | 133 | for (i = 0; i < sizeof(tests)/sizeof(tests[0]); ++i) { 134 | fprintf(stderr, "'%s' in %zu bits... ", tests[i].name, size); 135 | 136 | tests[i].generate(a, b, result); 137 | 138 | if (verify_operation(a, b, result, tests[i].check)) 139 | fprintf(stderr, "OK\n"); 140 | 141 | ewah_clear(result); 142 | } 143 | 144 | ewah_free(a); 145 | ewah_free(b); 146 | ewah_free(result); 147 | } 148 | 149 | int main(int argc, char *argv[]) 150 | { 151 | size_t i; 152 | srand(time(NULL)); 153 | 154 | for (i = 8; i < 30; ++i) { 155 | test_for_size((size_t)1 << i); 156 | } 157 | 158 | return 0; 159 | } 160 | --------------------------------------------------------------------------------