├── _config.yml ├── img └── cef.png ├── examples ├── empty.bloom ├── hashlookup-sha1.yar └── hashlookup-add-sha1.yar ├── libyara ├── fleur │ ├── fnv.h │ ├── fnv.c │ ├── fleur.h │ └── fleur.c └── modules │ └── araygrass │ └── araygrass.c ├── .gitignore ├── LICENSE └── README.md /_config.yml: -------------------------------------------------------------------------------- 1 | theme: jekyll-theme-cayman -------------------------------------------------------------------------------- /img/cef.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hashlookup/a-ray-grass/HEAD/img/cef.png -------------------------------------------------------------------------------- /examples/empty.bloom: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hashlookup/a-ray-grass/HEAD/examples/empty.bloom -------------------------------------------------------------------------------- /examples/hashlookup-sha1.yar: -------------------------------------------------------------------------------- 1 | import "araygrass" 2 | import "hash" 3 | 4 | rule Hashlookup 5 | { 6 | condition: 7 | araygrass.check_string(hash.sha1(0, filesize), 1) == 1 8 | } 9 | -------------------------------------------------------------------------------- /examples/hashlookup-add-sha1.yar: -------------------------------------------------------------------------------- 1 | import "araygrass" 2 | import "hash" 3 | 4 | rule Hashlookup 5 | { 6 | condition: 7 | araygrass.add_string(hash.sha1(0, filesize), 1) == 0 8 | } 9 | -------------------------------------------------------------------------------- /libyara/fleur/fnv.h: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | // Same values as in flor / bloom 4 | #define FNV_PRIME ((uint64_t)1099511628211) 5 | #define FNV_OFFSET ((uint64_t)14695981039346656037UL) 6 | 7 | typedef union { 8 | uint64_t h; 9 | unsigned char hexrepr[8]; 10 | }fnvhash; 11 | 12 | uint64_t fnv1(char *buf, size_t buf_size); 13 | void getDigest(fnvhash * fh, char * str); -------------------------------------------------------------------------------- /libyara/fleur/fnv.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include "fnv.h" 4 | 5 | uint64_t fnv1(char *buf, size_t buf_size) { 6 | uint64_t h = FNV_OFFSET; 7 | for (size_t i = 0; i < buf_size; i++) { 8 | h *= FNV_PRIME; 9 | h ^= buf[i]; 10 | } 11 | return h; 12 | } 13 | 14 | 15 | void getDigest(fnvhash * fh, char * str){ 16 | sprintf(str, "%.2x%.2x%.2x%.2x%.2x%.2x%.2x%.2x", 17 | fh->hexrepr[7], 18 | fh->hexrepr[6], 19 | fh->hexrepr[5], 20 | fh->hexrepr[4], 21 | fh->hexrepr[3], 22 | fh->hexrepr[2], 23 | fh->hexrepr[1], 24 | fh->hexrepr[0] 25 | ); 26 | } 27 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Prerequisites 2 | *.d 3 | 4 | # Object files 5 | *.o 6 | *.ko 7 | *.obj 8 | *.elf 9 | 10 | # Linker output 11 | *.ilk 12 | *.map 13 | *.exp 14 | 15 | # Precompiled Headers 16 | *.gch 17 | *.pch 18 | 19 | # Libraries 20 | *.lib 21 | *.a 22 | *.la 23 | *.lo 24 | 25 | # Shared objects (inc. Windows DLLs) 26 | *.dll 27 | *.so 28 | *.so.* 29 | *.dylib 30 | 31 | # Executables 32 | *.exe 33 | *.out 34 | *.app 35 | *.i*86 36 | *.x86_64 37 | *.hex 38 | 39 | # Debug files 40 | *.dSYM/ 41 | *.su 42 | *.idb 43 | *.pdb 44 | 45 | # Kernel Module Compile Results 46 | *.mod* 47 | *.cmd 48 | .tmp_versions/ 49 | modules.order 50 | Module.symvers 51 | Mkfile.old 52 | dkms.conf 53 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | BSD 3-Clause License 2 | 3 | Copyright (c) 2022, Jean-Louis Huynen 4 | All rights reserved. 5 | 6 | Redistribution and use in source and binary forms, with or without 7 | modification, are permitted provided that the following conditions are met: 8 | 9 | 1. Redistributions of source code must retain the above copyright notice, this 10 | list of conditions and the following disclaimer. 11 | 12 | 2. Redistributions in binary form must reproduce the above copyright notice, 13 | this list of conditions and the following disclaimer in the documentation 14 | and/or other materials provided with the distribution. 15 | 16 | 3. Neither the name of the copyright holder nor the names of its 17 | contributors may be used to endorse or promote products derived from 18 | this software without specific prior written permission. 19 | 20 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 21 | AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 22 | IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 23 | DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE 24 | FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 25 | DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 26 | SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER 27 | CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 28 | OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 29 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 30 | -------------------------------------------------------------------------------- /libyara/fleur/fleur.h: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | 5 | typedef struct header { 6 | uint64_t version; 7 | //desired maximum number of elements 8 | uint64_t n; 9 | //desired false positive probability 10 | double p; 11 | //number of hash functions 12 | uint64_t k; 13 | //number of bits 14 | uint64_t m; 15 | //number of elements 16 | uint64_t N; 17 | } header; 18 | 19 | typedef struct BloomFilter { 20 | // version, needed for serialization 21 | uint64_t version; 22 | // datasize, needed for serialization 23 | uint64_t datasize; 24 | 25 | // bloom filter file header 26 | header h; 27 | 28 | //number of 64-bit integers (generated automatically) 29 | uint64_t M; 30 | 31 | // bit array - dynamic 32 | uint64_t *v; 33 | 34 | // arbitrary data that we can attach to the filter - dynamic 35 | unsigned char *Data; 36 | 37 | // has the bloom filter been modified? 38 | int modified; 39 | // error on instanciation 40 | int error; 41 | 42 | } BloomFilter; 43 | 44 | static const uint64_t m = 18446744073709551557LLU; 45 | static const uint64_t g = 18446744073709550147LLU; 46 | 47 | struct BloomFilter fleur_initialize(uint64_t n, double p, char *buf); 48 | struct BloomFilter fleur_bloom_filter_from_file(FILE* f); 49 | 50 | int fleur_add(BloomFilter * bf, char *buf, size_t buf_size); 51 | int fleur_check(BloomFilter * bf, char *buf, size_t buf_size); 52 | void fleur_set_data(BloomFilter * bf, char* buf, size_t buf_size ); 53 | void fleur_bloom_filter_to_file(BloomFilter * bf, FILE* of); 54 | void fleur_fingerprint(BloomFilter * bf, char *buf, size_t buf_size, uint64_t **fingerprint); 55 | 56 | void fleur_print_header(header * h); 57 | void fleur_print_filter(BloomFilter * bf); 58 | int fleur_check_header(header * h); -------------------------------------------------------------------------------- /libyara/modules/araygrass/araygrass.c: -------------------------------------------------------------------------------- 1 | /* 2 | Copyright (c) 2014. The YARA Authors. All Rights Reserved. 3 | 4 | Redistribution and use in source and binary forms, with or without modification, 5 | are permitted provided that the following conditions are met: 6 | 7 | 1. Redistributions of source code must retain the above copyright notice, this 8 | list of conditions and the following disclaimer. 9 | 10 | 2. Redistributions in binary form must reproduce the above copyright notice, 11 | this list of conditions and the following disclaimer in the documentation and/or 12 | other materials provided with the distribution. 13 | 14 | 3. Neither the name of the copyright holder nor the names of its contributors 15 | may be used to endorse or promote products derived from this software without 16 | specific prior written permission. 17 | 18 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND 19 | ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 20 | WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 21 | DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR 22 | ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES 23 | (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 24 | LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON 25 | ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 26 | (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 27 | SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 28 | */ 29 | 30 | #include 31 | #include 32 | #include 33 | 34 | #define MODULE_NAME araygrass 35 | 36 | // #define BF_PATH "/home/jlouis/Downloads/hashlookup-full.bloom" 37 | #define BF_PATH_IN "/home/jlouis/Git/yara/test-in.bloom" 38 | // If a filter already exist at this location, it will be overwritten 39 | #define BF_PATH_OUT "/home/jlouis/Git/yara/test-out.bloom" 40 | 41 | // BloomFilter global variable 42 | BloomFilter bf; 43 | 44 | char* strtoupper(char* s) { 45 | assert(s != NULL); 46 | 47 | char* p = s; 48 | while (*p != '\0') { 49 | *p = toupper(*p); 50 | p++; 51 | } 52 | 53 | return s; 54 | } 55 | 56 | define_function(check_string) 57 | { 58 | SIZED_STRING* s = sized_string_argument(1); 59 | int64_t topupperflag = integer_argument(2); 60 | int test = 0; 61 | if (topupperflag == (int64_t)1) { 62 | test = fleur_check(&bf, strtoupper(s->c_string), s->length); 63 | }else{ 64 | test = fleur_check(&bf, s->c_string, s->length); 65 | } 66 | 67 | return_integer(test); 68 | } 69 | 70 | define_function(add_string) 71 | { 72 | SIZED_STRING* s = sized_string_argument(1); 73 | int64_t topupperflag = integer_argument(2); 74 | int test = 0; 75 | if (topupperflag == (int64_t)1) { 76 | test = fleur_add(&bf, strtoupper(s->c_string), s->length); 77 | }else{ 78 | test = fleur_add(&bf, s->c_string, s->length); 79 | } 80 | 81 | return_integer(test); 82 | } 83 | 84 | int module_initialize(YR_MODULE* module) 85 | { 86 | FILE* in = fopen(BF_PATH_IN, "rb"); 87 | if (in == NULL) { 88 | exit(EXIT_FAILURE); 89 | } 90 | 91 | bf = fleur_bloom_filter_from_file(in); 92 | fclose(in); 93 | 94 | return ERROR_SUCCESS; 95 | } 96 | 97 | 98 | int module_finalize(YR_MODULE* module) 99 | { 100 | if(bf.modified == 1){ 101 | printf("saving\n"); 102 | FILE* out; 103 | out = fopen(BF_PATH_OUT, "wb"); 104 | if (out == NULL) { 105 | return EXIT_FAILURE; 106 | } 107 | fleur_bloom_filter_to_file(&bf, out); 108 | fclose(out); 109 | } 110 | free(bf.v); 111 | return ERROR_SUCCESS; 112 | } 113 | 114 | int module_load( 115 | YR_SCAN_CONTEXT* context, 116 | YR_OBJECT* module_object, 117 | void* module_data, 118 | size_t module_data_size) 119 | { 120 | 121 | return ERROR_SUCCESS; 122 | } 123 | 124 | 125 | int module_unload(YR_OBJECT* module_object) 126 | { 127 | return ERROR_SUCCESS; 128 | } 129 | 130 | begin_declarations; 131 | 132 | declare_function("check_string", "si", "i", check_string); 133 | declare_function("add_string", "si", "i", add_string); 134 | 135 | end_declarations; -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # a-ray-grass 2 | `a-ray-grass` is a yara module that provides support for DCSO format bloom filters in yara. In the context of [hashlookup](https://hashlookup.io), it allows quickly discard known files "pour séparer le grain de l\'ivraie". 3 | 4 | # Installation 5 | ## Copy 6 | - Copy the folder `libyara/fleur` in `libyara` 7 | - Copy the folder `libyara/modules/araygrass` in `libyara/modules` 8 | 9 | ## Modify `libyara/Makefile.am` 10 | - Add `modules/araygrass/araygrass.c` to the `MODULES` variable: 11 | ``` 12 | MODULES += modules/araygrass/araygrass.c 13 | ``` 14 | - Add `fleur/fnv.c` and `fleur/fleur.c` to the `libyara_la_SOURCES` variable: 15 | ``` 16 | libyara_la_SOURCES = \ 17 | $(MODULES) \ 18 | grammar.y \ 19 | ... 20 | fleur/fnv.c \ 21 | fleur/fleur.c \ 22 | threading.c 23 | ``` 24 | 25 | ## Modify `/libyara/modules/module_list` 26 | - Append `MODULE(araygrass)` at the end of the file 27 | 28 | ## Finally modify `libyara/modules/araygrass/araygrass.c` 29 | There you will find two paths defined: 30 | 31 | - `BF_PATH_IN`: specify here the path to your input bloom filter, the filter against wich variables will be checked. 32 | - `BF_PATH_OUT`: specify here the path where you wish to save the modified 33 | bloom filter after yara finished its execution. It can be left empty if no 34 | modification are made to the filter. 35 | 36 | It's totally ok to set the same path for both variables. 37 | 38 | # Compilation 39 | 40 | Go back the yara's root folder and `make` (followed by `sudo make install` if you wish this version of yara to replace your current version). 41 | 42 | # Functions 43 | a-ray-grass provides two functions, `check_string` and `add_string`. Both functions take two arguments: 44 | - a string to match against the bloom filter, 45 | - a flag to specify whether the string should bit translated to uppercase before checking/adding. 46 | 47 | ## `check_string` 48 | `check_string` takes a string, the uppercase int flag, and returns an integer: 49 | - 1 if the string may be in the bloom filter (given your bloom filter' parameter), 50 | - 0 if the string is definitely on in the filter. 51 | 52 | ## `add_string` 53 | `add_string` takes a string, the uppercase int flag, and returns an integer: 54 | - 0 if the string likely already present in the filter, therefore not added, 55 | - 1 if the string was definitely not present, but now it is. 56 | 57 | # Usage and use-cases 58 | Wait would you use this modules? I am glad you asked ! 59 | 60 | ## Bloom filters 61 | This modules is compatible with bloom filters generated by DCSO's tools: 62 | - [bloom](https://github.com/DCSO/bloom) (golang) 63 | - [flor](https://github.com/DCSO/flor) (python) 64 | As well as [fleur](https://github.com/hashlookup/fleur) (C) 65 | 66 | As a bloom filter is necessary, an empty one is provided in `examples/empty.bloom`. But you can create one with 67 | `bloom create test.bloom` for instance. 68 | 69 | The most useful public filter one can use is [hashlookup's](https://cra.circl.lu/hashlookup/hashlookup-full.bloom), that contains a lot of sha1 (in uppercase) of known files. 70 | 71 | ## Filtering known files 72 | Let's consider the following yara rule for instance: 73 | ``` 74 | import "araygrass" 75 | import "hash" 76 | 77 | rule HashlookupMatching 78 | { 79 | condition: 80 | araygrass.check_string(hash.sha1(0, filesize), 1) == 1 81 | } 82 | 83 | ``` 84 | In this instance, each file is fully hashed with SHA1, then checked against hashlookup's filter: 85 | ```shell 86 | $./yara hashlookup-sha1.yar -r /usr/bin 87 | Hashlookup /usr/bin/ctanify 88 | Hashlookup /usr/bin/qdoc 89 | Hashlookup /usr/bin/ps2pdfwr 90 | Hashlookup /usr/bin/ubuntu-security-status 91 | ... 92 | ``` 93 | One can recompile the module to point to the right filter but consider copying/linking to get more flexibilty. 94 | 95 | ## Storing already processed files 96 | In the following examples, we actually match against hashlookup, and add to the filter if we don't know the file. 97 | ``` 98 | import "araygrass" 99 | import "hash" 100 | 101 | rule Hashlookup 102 | { 103 | condition: 104 | araygrass.check_string(hash.sha1(0, filesize), 1) == 1 105 | } 106 | ``` 107 | It's especially usefull to avoid bumping into the same file again, and can come handy when combined with other conditions ;) 108 | 109 | ## Partial hashing 110 | Remember that `yara`'s `hash` modules support hashing parts of file, therefore it is totatally doable to only store the hash of say the first 2K of each files. 111 | Hashlookup team is working on such dataset but it is not public ATM. 112 | 113 | # Acknowledgment 114 | 115 | ![](./img/cef.png) 116 | 117 | The project has been co-funded by CEF-TC-2020-2 - 2020-EU-IA-0260 - JTAN - Joint Threat Analysis Network. 118 | -------------------------------------------------------------------------------- /libyara/fleur/fleur.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | 6 | #include "fnv.h" 7 | #include "fleur.h" 8 | 9 | // fleur_bloom_filter_to_file serializes a bloom filter to a file 10 | // it receives a file descriptor 11 | void fleur_bloom_filter_to_file(BloomFilter * bf, FILE* of){ 12 | bf->version = (uint64_t) 1; 13 | fwrite(&bf->version, sizeof(uint64_t), 1, of); 14 | fwrite(&bf->h.n, sizeof(uint64_t), 1, of); 15 | fwrite(&bf->h.p, sizeof(double), 1, of); 16 | fwrite(&bf->h.k, sizeof(uint64_t), 1, of); 17 | fwrite(&bf->h.m, sizeof(uint64_t), 1, of); 18 | fwrite(&bf->h.N, sizeof(uint64_t), 1, of); 19 | fwrite(bf->v, bf->M * sizeof(uint64_t), 1, of); 20 | fwrite(bf->Data, bf->datasize * sizeof(unsigned char), 1, of); 21 | } 22 | 23 | // fleur_set_data sets the data field of the bloom filter 24 | void fleur_set_data(BloomFilter * bf, char* buf, size_t buf_size ){ 25 | free(bf->Data); 26 | bf->Data = calloc(buf_size, sizeof(unsigned char)); 27 | bf->datasize = buf_size; 28 | memcpy(bf->Data, buf, buf_size); 29 | } 30 | 31 | // fleur_bloom_filter_from_file returns a pointer to a BloomFilter from a 32 | // file descriptor. 33 | struct BloomFilter fleur_bloom_filter_from_file(FILE* f){ 34 | BloomFilter bf; 35 | header h; 36 | 37 | size_t elements_read = fread(&h, sizeof(header), 1, f); 38 | if(elements_read == 0){ 39 | fprintf(stderr, "Error reading filter file.\n"); 40 | bf.error = 1; 41 | fclose(f); 42 | return bf; 43 | } 44 | 45 | if (fleur_check_header(&h) != 1){ 46 | fprintf(stderr, "Incoherent header.\n"); 47 | bf.error = 1; 48 | fclose(f); 49 | return bf; 50 | } 51 | 52 | bf.M = ceil(h.m / 64.0); 53 | 54 | // Get data size 55 | int err = fseek(f, 0, SEEK_END); 56 | if(err!=0){ 57 | fprintf(stderr, "Cannot seek in binary file.\n"); 58 | bf.error = 1; 59 | fclose(f); 60 | return bf; 61 | } 62 | long size = ftell(f); 63 | fseek(f, 48, SEEK_SET); 64 | 65 | if (bf.M <= (size - 48)){ 66 | 67 | bf.datasize = size - ceil((bf.M*64)/8) - 48; 68 | 69 | // Load bitarray 70 | bf.v = calloc(bf.M, sizeof(uint64_t)); 71 | elements_read = fread(bf.v, sizeof(uint64_t), bf.M, f); 72 | if(elements_read == 0){ 73 | fprintf(stderr, "Cannot load bitarray.\n"); 74 | bf.error = 1; 75 | fclose(f); 76 | return bf; 77 | } 78 | 79 | // Load remaining data 80 | if (bf.datasize > 0) { 81 | // keep one for adding the nullbyte 82 | bf.Data = calloc(bf.datasize + 1, sizeof(unsigned char)); 83 | elements_read = fread(bf.Data, sizeof(char), bf.datasize, f); 84 | if(elements_read == 0){ 85 | fprintf(stderr, "Cannot load bloom filter metadata.\n"); 86 | bf.error = 1; 87 | fclose(f); 88 | return bf; 89 | } 90 | bf.Data[bf.datasize] = '\0'; 91 | } 92 | } 93 | 94 | bf.modified = 0; 95 | bf.error = 0; 96 | 97 | bf.h = h; 98 | 99 | return bf; 100 | } 101 | 102 | // fleur_fingerprint returns the fingerprint of a given value, as an array of index 103 | // values. 104 | void fleur_fingerprint(BloomFilter * bf, char *buf, size_t buf_size, uint64_t **fingerprint) { 105 | uint64_t* tmp = calloc(bf->h.k, sizeof(uint64_t)); 106 | uint64_t h = fnv1(buf, buf_size); 107 | uint64_t hn = h % m; 108 | for (uint64_t i = 0; i < bf->h.k; i++){ 109 | hn = (hn * g) % m; 110 | tmp[i] = (uint64_t)hn % bf->h.m; 111 | } 112 | 113 | free(*fingerprint); 114 | *fingerprint = tmp; 115 | } 116 | 117 | // fleur_add adds a byte array element to the Bloom filter. 118 | // return 0 when the value is likely already present in the filter 119 | // and -1 when the filter is full 120 | int fleur_add(BloomFilter * bf, char *buf, size_t buf_size) { 121 | if ((bf->h.N+1) <= bf->h.n){ 122 | uint64_t k, l; 123 | int newValue = 0; 124 | uint64_t* fp = calloc(bf->h.k, sizeof(uint64_t)); 125 | fleur_fingerprint(bf, buf, buf_size, &fp); 126 | for (uint64_t i = 0; i < bf->h.k; i++) { 127 | k = fp[i] / 64; 128 | l = fp[i] % 64; 129 | uint64_t v = (uint64_t)1 << l; 130 | if ((bf->v[k] & v) == 0) { 131 | newValue = 1; 132 | } 133 | bf->v[k] |= v; 134 | } 135 | if (newValue == 1) { 136 | bf->h.N++; 137 | bf->modified = 1; 138 | free(fp); 139 | return 1; 140 | }else{ 141 | free(fp); 142 | return 0; 143 | } 144 | }else{ 145 | return -1; 146 | } 147 | } 148 | 149 | // fleur_check returns 1 if the given value may be in the Bloom filter, 0 if it 150 | // is definitely not in it. 151 | int fleur_check(BloomFilter * bf, char *buf, size_t buf_size) { 152 | uint64_t k, l; 153 | uint64_t* fp = calloc(bf->h.k, sizeof(uint64_t)); 154 | fleur_fingerprint(bf, buf, buf_size, &fp); 155 | for (uint64_t i = 0; i < bf->h.k; i++){ 156 | k = fp[i] / 64; 157 | l = fp[i] % 64; 158 | uint64_t v = (uint64_t)1 << l; 159 | if ((bf->v[k] & v) == 0){ 160 | free(fp); 161 | return 0; 162 | } 163 | } 164 | free(fp); 165 | return 1; 166 | } 167 | 168 | // fleur_initialize creates a an empty Bloom filter with the given capacity (n) 169 | // and FP probability (p). 170 | struct BloomFilter fleur_initialize(uint64_t n, double p, char *buf){ 171 | BloomFilter bf; 172 | 173 | uint64_t m = fabs(ceil((double)(n) * log(p) / pow(log(2.0), 2.0))); 174 | uint64_t k = ceil(log(2) * m / n ); 175 | 176 | bf.Data = (unsigned char*)buf; 177 | 178 | bf.M = ceil(m / 64.0); 179 | 180 | bf.v = calloc(bf.M, sizeof(uint64_t)); 181 | 182 | header h = {1, n, p, k, m, 0}; 183 | 184 | bf.h = h; 185 | 186 | bf.modified = 0; 187 | return bf; 188 | } 189 | 190 | // fleur_print_header prints a BloomFilter's header 191 | void fleur_print_header(header * h){ 192 | printf("Header details:\n version: %lu\n n: %lu \n p: %f\n k: %lu \n m: %lu \n N: %lu \n", 193 | h->version, 194 | h->n, 195 | h->p, 196 | h->k, 197 | h->m, 198 | h->N); 199 | } 200 | 201 | // fleur_check_printer check a BloomFilter's header for inconsistencies 202 | int fleur_check_header(header * h){ 203 | if (h->version != 1){ 204 | fprintf(stderr, "Current filter version not supported.\n"); 205 | return 0; 206 | } 207 | // 0111111111111111111111111111111111111111111111111111111111111111b 208 | uint64_t maxint = 9223372036854775807; 209 | if (h->k >= maxint){ 210 | fprintf(stderr, "value of k (number of hash functions) is too high.\n"); 211 | return 0; 212 | } 213 | if (h->p <= __DBL_EPSILON__){ 214 | fprintf(stderr, "p is too small.\n"); 215 | return 0; 216 | } 217 | if (h->p > 1){ 218 | fprintf(stderr, "p is more than one.\n"); 219 | return 0; 220 | } 221 | if (h->N > h->n){ 222 | fprintf(stderr, "incoherent filter.\n"); 223 | return 0; 224 | } 225 | uint64_t tmp_m = fabs(ceil((double)(h->n) * log(h->p) / pow(log(2.0), 2.0))); 226 | if (tmp_m != h->m){ 227 | fprintf(stderr, "incoherent filter.\n"); 228 | return 0; 229 | } 230 | uint64_t tmp_k = ceil(log(2) * h->m / h->n ); 231 | if (tmp_k != h->k){ 232 | fprintf(stderr, "incoherent filter values.\n"); 233 | return 0; 234 | } 235 | return 1; 236 | } 237 | 238 | // fleur_printer_filter prints a BloomFilter's details 239 | void fleur_print_filter(BloomFilter * bf){ 240 | printf("Filter details:\n n: %lu \n p: %f\n k: %lu \n m: %lu \n N: %lu \n M: %lu\n Data: %s.", 241 | bf->h.n, 242 | bf->h.p, 243 | bf->h.k, 244 | bf->h.m, 245 | bf->h.N, 246 | bf->M, 247 | bf->Data); 248 | } --------------------------------------------------------------------------------