├── .circleci └── config.yml ├── .gitignore ├── LICENSE ├── README.md ├── bitarray.nimble ├── bitarray ├── bitarray.nim ├── bloom_filter_demo.nim └── bloom_filter_varying_bit_demo.nim └── circle.yml /.circleci/config.yml: -------------------------------------------------------------------------------- 1 | version: 2 2 | 3 | jobs: 4 | build: 5 | working_directory: ~/nim-bitarray 6 | docker: 7 | - image: nimlang/nim:0.19.2 8 | steps: 9 | - checkout 10 | - run: nim c bitarray/bitarray.nim 11 | - run: ./bitarray/bitarray 12 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | nimcache/* 2 | bitarray/bitarray 3 | bitarray/bloom_filter_demo 4 | bitarray/bloom_filter_varying_bit_demo 5 | bitarray/nimcache/* -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | The MIT License (MIT) 2 | 3 | Copyright (c) 2014 Nick Greenfield, Reference Genomics, Inc. 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy of 6 | this software and associated documentation files (the "Software"), to deal in 7 | the Software without restriction, including without limitation the rights to 8 | use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of 9 | the Software, and to permit persons to whom the Software is furnished to do so, 10 | subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS 17 | FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR 18 | COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER 19 | IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 20 | CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 21 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | nim-bitarray 2 | ============ 3 | 4 | [![Circle CI](https://circleci.com/gh/onecodex/nim-bitarray.svg?style=svg&circle-token=fe4c2508901f659e0f1a5a9a8ed418bf13290c6b)](https://circleci.com/gh/onecodex/nim-bitarray) 5 | 6 | Bitarray implementation in Nim 7 | 8 | An efficient bitarray implementation in Nim. Includes a demo Bloom filter in ~75 lines that also requires [murmur3](https://github.com/boydgreenfield/murmur3) >= v0.2.0. 9 | -------------------------------------------------------------------------------- /bitarray.nimble: -------------------------------------------------------------------------------- 1 | [Package] 2 | name = "bitarray" 3 | version = "0.6.0" 4 | author = "Nick Boyd Greenfield" 5 | description = "Efficient in-memory or mmap-backed bitarray implementation in Nim" 6 | license = "MIT" 7 | srcDir = "bitarray" 8 | 9 | [Deps] 10 | Requires: "nim >= 0.19.0" -------------------------------------------------------------------------------- /bitarray/bitarray.nim: -------------------------------------------------------------------------------- 1 | import memfiles 2 | from os import nil 3 | from strutils import `%`, formatFloat, ffDecimal, toBin 4 | from random import rand, randomize 5 | from times import nil 6 | 7 | 8 | # Type declarations 9 | type 10 | BitArrayScalar* = uint 11 | 12 | type 13 | BitArrayError* = object of Exception 14 | BitArrayKind = enum inmem, mmap 15 | FlexArray = UncheckedArray[BitArrayScalar] 16 | BitArray* = ref object 17 | size_elements: int 18 | size_bits*: int 19 | size_specified*: int 20 | bitarray*: ptr FlexArray 21 | read_only: bool 22 | case kind: BitArrayKind 23 | of inmem: 24 | nil 25 | of mmap: 26 | mm_filehandle: MemFile 27 | 28 | 29 | const ONE = BitArrayScalar(1) 30 | 31 | 32 | # Header -- this is useful for tracking/versioning 33 | # downstream data structures built on TBitArray 34 | const HEADER_SIZE = 1 35 | const DEFAULT_HEADER = BitArrayScalar(0xFFFFFFFFFFFF0025) # 8 bytes 36 | 37 | 38 | proc finalize_bitarray(a: BitArray) = 39 | if not a.bitarray.isNil: 40 | case a.kind 41 | of inmem: 42 | dealloc(a.bitarray) 43 | a.bitarray = nil 44 | of mmap: 45 | a.mm_filehandle.close() 46 | 47 | 48 | proc close*(a: BitArray) = 49 | case a.kind 50 | of inmem: 51 | discard 52 | of mmap: 53 | a.mm_filehandle.close() 54 | 55 | 56 | proc get_header*(ba: BitArray): BitArrayScalar = 57 | ## Gets the header of the BitArray object. 58 | ## This is defined to be a 1-length BitArrayScalar 59 | ## (uint by default). 60 | result = ba.bitarray[0] 61 | 62 | 63 | proc create_bitarray*(size: int, header: BitArrayScalar = DEFAULT_HEADER): BitArray = 64 | ## Creates an in-memory bitarray using a specified input size. 65 | ## Note that this will round up to the nearest byte. 66 | if size < (sizeof(BitArrayScalar) * 8): 67 | raise newException(BitArrayError, 68 | "Minimum size of a bitarray is $#" % 69 | [$(sizeof(BitArrayScalar) * 8)]) 70 | let n_elements = size div (sizeof(BitArrayScalar) * 8) 71 | let n_bits = n_elements * (sizeof(BitArrayScalar) * 8) 72 | new(result, finalize_bitarray) 73 | result.kind = inmem 74 | result.bitarray = cast[ptr FlexArray](alloc0((n_elements + 1) * sizeof(BitArrayScalar))) 75 | result.size_elements = n_elements 76 | result.size_bits = n_bits 77 | result.size_specified = size 78 | result.bitarray[0] = header 79 | 80 | 81 | proc create_bitarray*(file: string, size: int = -1, header: BitArrayScalar = DEFAULT_HEADER, 82 | read_only: bool = false, enforce_header: bool = true): BitArray = 83 | ## Creates an mmap-backed bitarray. If the specified file exists 84 | ## it will be opened, but an exception will be raised if the size 85 | ## is specified and does not match. If the file does not exist 86 | ## it will be created. 87 | if size >= 0 and size < (sizeof(BitArrayScalar) * 8): 88 | raise newException(BitArrayError, 89 | "Minimum size of a bitarray is $#" % 90 | [$(sizeof(BitArrayScalar) * 8)]) 91 | var n_elements = size div (sizeof(char) * 8) 92 | n_elements = n_elements + sizeof(BitArrayScalar) # For header 93 | if size mod (sizeof(char) * 8) != 0: 94 | n_elements += 1 95 | var mm_file: MemFile 96 | var new_file = false 97 | if os.existsFile(file): 98 | if read_only: 99 | mm_file = open(file, mode = fmRead, mappedSize = -1) 100 | else: 101 | mm_file = open(file, mode = fmReadWrite, mappedSize = -1) 102 | if size != -1 and mm_file.size != n_elements: 103 | raise newException(BitArrayError, "Existing mmap file $# does not have the specified size $#. Size is $# instead." % [$file, $n_elements, $mm_file.size]) 104 | else: 105 | if size == -1: 106 | raise newException(BitArrayError, "No existing mmap file. Must specify size to create one.") 107 | mm_file = open(file, mode = fmReadWrite, newFileSize = n_elements) 108 | new_file = true 109 | 110 | new(result, finalize_bitarray) 111 | result.kind = mmap 112 | result.bitarray = cast[ptr FlexArray](mm_file.mem) 113 | result.size_elements = n_elements 114 | result.size_bits = mm_file.size * (sizeof(char) * 8) - (sizeof(BitArrayScalar) * 8 * HEADER_SIZE) # Don't use header in calculation, offset below 115 | result.size_specified = size 116 | result.mm_filehandle = mm_file 117 | result.read_only = read_only 118 | if new_file: # Only alter header on creation 119 | result.bitarray[0] = header 120 | else: # enforce header 121 | let old_header = get_header(result) 122 | if enforce_header and old_header != header: 123 | raise newException(BitArrayError, "Headers do not match. Possible version mismatch on underlying bitarray.") 124 | 125 | 126 | proc `[]=`*(ba: var BitArray, index: int, val: bool) {.inline.} = 127 | ## Sets the bit at an index to be either 0 (false) or 1 (true) 128 | when not defined(release): 129 | if index >= ba.size_bits or index < 0: 130 | raise newException(BitArrayError, "Specified index is too large.") 131 | if ba.read_only: 132 | raise newException(BitArrayError, "Cannot write to a read-only array.") 133 | let i_element = HEADER_SIZE + index div (sizeof(BitArrayScalar) * 8) 134 | let i_offset = BitArrayScalar(index mod (sizeof(BitArrayScalar) * 8)) 135 | if val: 136 | ba.bitarray[i_element] = (ba.bitarray[i_element] or (ONE shl i_offset)) 137 | else: 138 | ba.bitarray[i_element] = (ba.bitarray[i_element] and (not (ONE shl i_offset))) 139 | 140 | 141 | proc `[]`*(ba: var BitArray, index: int): bool {.inline.} = 142 | ## Gets the bit at an index element (returns a bool) 143 | when not defined(release): 144 | if index >= ba.size_bits or index < 0: 145 | raise newException(BitArrayError, "Specified index is too large.") 146 | let i_element = HEADER_SIZE + index div (sizeof(BitArrayScalar) * 8) 147 | let i_offset = BitArrayScalar(index mod (sizeof(BitArrayScalar) * 8)) 148 | result = bool((ba.bitarray[i_element] shr i_offset) and ONE) 149 | 150 | 151 | proc `[]`*(ba: var BitArray, index: Slice): BitArrayScalar {.inline.} = 152 | ## Get the bits for a slice of the bitarray. Supports slice sizes 153 | ## up the maximum element size (64 bits by default) 154 | when not defined(release): 155 | if index.b >= ba.size_bits or index.a < 0: 156 | raise newException(BitArrayError, "Specified index is too large.") 157 | if (index.b - index.a) > (sizeof(BitArrayScalar) * 8): 158 | raise newException(BitArrayError, "Only slices up to $1 bits are supported." % $(sizeof(BitArrayScalar) * 8)) 159 | 160 | let i_element_a = HEADER_SIZE + index.a div (sizeof(BitArrayScalar) * 8) 161 | let i_offset_a = BitArrayScalar(index.a mod (sizeof(BitArrayScalar) * 8)) 162 | let i_element_b = HEADER_SIZE + index.b div (sizeof(BitArrayScalar) * 8) 163 | let i_offset_b = BitArrayScalar(sizeof(BitArrayScalar) * 8) - i_offset_a 164 | result = ba.bitarray[i_element_a] shr i_offset_a 165 | if i_element_a != i_element_b: # Combine two slices 166 | let slice_b = ba.bitarray[i_element_b] shl i_offset_b 167 | result = result or slice_b 168 | elif index.a != index.b and index.b < sizeof(BitArrayScalar) * 8 - 1: 169 | let inner_offset_b = index.b mod (sizeof(BitArrayScalar) * 8) 170 | result = BitArrayScalar(((1 shl inner_offset_b)-1) or (1 shl inner_offset_b)) and result 171 | return result # Fails if this isn't included? 172 | 173 | 174 | proc `[]=`*(ba: var BitArray, index: Slice, val: BitArrayScalar) {.inline.} = 175 | ## Set the bits for a slice of the bitarray. Supports slice sizes 176 | ## up to the maximum element size (64 bits by default) 177 | ## Note: This inserts using a bitwise-or, it will *not* overwrite previously 178 | ## set true values! 179 | when not defined(release): 180 | if index.b >= ba.size_bits or index.a < 0: 181 | raise newException(BitArrayError, "Specified index is too large.") 182 | if (index.b - index.a) > (sizeof(BitArrayScalar) * 8): 183 | raise newException(BitArrayError, "Only slices up to $1 bits are supported." % $(sizeof(BitArrayScalar) * 8)) 184 | 185 | if ba.read_only: 186 | raise newException(BitArrayError, "Cannot write to a read-only array.") 187 | 188 | # TODO(nbg): Make a macro for handling this and also the if/else in-memory piece 189 | let i_element_a = HEADER_SIZE + index.a div (sizeof(BitArrayScalar) * 8) 190 | let i_offset_a = BitArrayScalar(index.a mod (sizeof(BitArrayScalar) * 8)) 191 | let i_element_b = HEADER_SIZE + index.b div (sizeof(BitArrayScalar) * 8) 192 | let i_offset_b = BitArrayScalar(sizeof(BitArrayScalar) * 8) - i_offset_a 193 | 194 | let insert_a = val shl i_offset_a 195 | ba.bitarray[i_element_a] = ba.bitarray[i_element_a] or insert_a 196 | if i_element_a != i_element_b: 197 | let insert_b = val shr i_offset_b 198 | ba.bitarray[i_element_b] = ba.bitarray[i_element_b] or insert_b 199 | 200 | 201 | proc `$`*(ba: BitArray): string = 202 | ## Print the number of bits and elements in the bitarray (elements are currently defined as 8-bit chars) 203 | result = ("BitArray with $1 bits and $2 unique elements. In-memory?: $3." % 204 | [$ba.size_bits, $ba.size_elements, $ba.kind]) 205 | 206 | 207 | when isMainModule: 208 | echo("Testing bitarray.nim code.") 209 | when not defined(release): 210 | let n_tests: int = int(1e6) 211 | let n_bits: int = int(2e9) # ~240MB, i.e., much larger than L3 cache 212 | else: 213 | let n_tests = int(1e8) 214 | let n_bits = int(2e9) 215 | 216 | block: # test specific slicing 217 | var ba = create_bitarray(64) 218 | ba[0] = true 219 | ba[2] = true 220 | ba[7] = true 221 | assert ba[0..63] == 133, "incorrect result: " & $ba[0..63] 222 | assert ba[0..4] == 5, "incorrect result: " & $ba[0..4] 223 | assert ba[1..4] == 2, "incorrect result: " & $ba[1..4] 224 | 225 | 226 | var bitarray_a = create_bitarray(n_bits) 227 | bitarray_a[0] = true 228 | bitarray_a[1] = true 229 | bitarray_a[2] = true 230 | 231 | var bitarray_b = create_bitarray("/tmp/ba.mmap", size=n_bits) 232 | bitarray_b.bitarray[3] = 4 233 | 234 | # Test range lookups/inserts 235 | bitarray_a[65] = true 236 | doAssert bitarray_a[65] 237 | doAssert bitarray_a[2..66] == BitArrayScalar(-9223372036854775807) # Lexer error prevents using 9223372036854775809'u64 directly... ugh 238 | 239 | bitarray_a[131] = true 240 | bitarray_a[194] = true 241 | assert bitarray_a[2..66] == bitarray_a[131..194] 242 | let slice_value = bitarray_a[131..194] 243 | bitarray_a[270..333] = slice_value 244 | bitarray_a[400..463] = BitArrayScalar(-9223372036854775807) 245 | assert bitarray_a[131..194] == bitarray_a[270..333] 246 | assert bitarray_a[131..194] == bitarray_a[400..463] 247 | 248 | # Seed RNG 249 | randomize(2882) # Seed the RNG 250 | var n_test_positions = newSeq[int](n_tests) 251 | 252 | for i in 0..(n_tests - 1): 253 | n_test_positions[i] = rand(n_bits) 254 | 255 | # Timing tests 256 | var start_time, end_time: float 257 | start_time = times.cpuTime() 258 | for i in 0..(n_tests - 1): 259 | bitarray_a[n_test_positions[i]] = true 260 | end_time = times.cpuTime() 261 | echo("Took ", formatFloat(end_time - start_time, format = ffDecimal, precision = 4), " seconds to insert ", n_tests, " items (in-memory).") 262 | 263 | start_time = times.cpuTime() 264 | for i in 0..(n_tests - 1): 265 | bitarray_b[n_test_positions[i]] = true 266 | end_time = times.cpuTime() 267 | echo("Took ", formatFloat(end_time - start_time, format = ffDecimal, precision = 4), " seconds to insert ", n_tests, " items (mmap-backed).") 268 | 269 | start_time = times.cpuTime() 270 | for i in 0..(n_tests - 1): 271 | doAssert bitarray_a[n_test_positions[i]] 272 | end_time = times.cpuTime() 273 | echo("Took ", formatFloat(end_time - start_time, format = ffDecimal, precision = 4), " seconds to lookup ", n_tests, " items (in-memory).") 274 | 275 | start_time = times.cpuTime() 276 | for i in 0..(n_tests - 1): 277 | doAssert bitarray_a[n_test_positions[i]] 278 | end_time = times.cpuTime() 279 | echo("Took ", formatFloat(end_time - start_time, format = ffDecimal, precision = 4), " seconds to lookup ", n_tests, " items (mmap-backed).") 280 | 281 | # Attempt to reopen bitarray and write to it 282 | bitarray_b[0] = false 283 | bitarray_b.mm_filehandle.close() 284 | var bitarray_c = create_bitarray("/tmp/ba.mmap", size=n_bits, read_only=true) 285 | try: 286 | bitarray_c[0] = true 287 | doAssert false 288 | except BitArrayError: 289 | doAssert true 290 | doAssert bitarray_c[0] == false 291 | 292 | # Header testing; first assert get_header is default 293 | doAssert bitarray_a.get_header() == DEFAULT_HEADER 294 | let new_header = BitArrayScalar(0xFFFFFFFFFFFFFEEE) 295 | var bitarray_d = create_bitarray(100000, header = new_header) 296 | doAssert bitarray_d.get_header() == new_header 297 | 298 | # Test that header enforcement works 299 | try: 300 | discard create_bitarray("/tmp/ba.mmap", header=BitArrayScalar(0)) 301 | doAssert false 302 | except BitArrayError: 303 | doAssert true 304 | 305 | # Test that bit arrays < sizeof(BitArrayScalar) fail 306 | try: 307 | var m = createBitArray(8) 308 | discard m 309 | doAssert false 310 | except BitArrayError: 311 | doAssert true 312 | var bitarray_64 = createBitArray(64) 313 | doAssert bitarray_64.size_bits == 64 314 | 315 | # Test clearing bits 316 | var bitarray_e = createBitArray(64) 317 | bitarray_e[1] = true 318 | bitarray_e[2] = false 319 | doAssert bitarray_e[1] == true 320 | 321 | echo("All tests successfully completed.") 322 | -------------------------------------------------------------------------------- /bitarray/bloom_filter_demo.nim: -------------------------------------------------------------------------------- 1 | # This is a demonstration script 2 | # for the bitarray and also requires 3 | # murmur3 >= 0.1.2. As that is 4 | # not a requirement of the main BitArray 5 | # type, it is not installed automatically 6 | # by Babel / on install of this module. 7 | import bitarray 8 | import murmur3 9 | import strutils 10 | import times 11 | from random import rand, randomize 12 | 13 | type 14 | BloomFilter = object 15 | bitarray: BitArray 16 | n_hashes: int 17 | n_bits_per_item: int 18 | n_bits: int 19 | 20 | proc create_bloom_filter*(n_elements: int, n_bits_per_item: int = 12, n_hashes: int = 6): BloomFilter = 21 | ## Generate a Bloom filter, nice and simple! 22 | let n_bits = n_elements * n_bits_per_item 23 | result = BloomFilter( 24 | bitarray: create_bitarray(n_bits), 25 | n_hashes: n_hashes, 26 | n_bits_per_item: n_bits_per_item, 27 | n_bits: n_bits 28 | ) 29 | 30 | {.push overflowChecks: off.} 31 | proc hash(bf: BloomFilter, item: string): seq[int] = 32 | var hashes: MurmurHashes = murmur_hash(item, 0) 33 | newSeq(result, bf.n_hashes) 34 | for i in 0..(bf.n_hashes - 1): 35 | result[i] = int(abs(hashes[0] + hashes[1] * i) mod bf.n_bits) # Coerce to int, murmur generates i64 36 | return result 37 | {.pop.} 38 | 39 | proc insert*(bf: var BloomFilter, item: string) = 40 | ## Put the string there 41 | let hashes = hash(bf, item) 42 | for h in hashes: 43 | bf.bitarray[h] = true 44 | 45 | proc lookup*(bf: var BloomFilter, item: string): bool = 46 | ## Is the string there? 47 | let hashes = hash(bf, item) 48 | result = true 49 | for h in hashes: 50 | result = result and bf.bitarray[h] 51 | return result 52 | 53 | 54 | when isMainModule: 55 | echo "Quick working Bloom filter example." 56 | let n_tests = int(2e7) 57 | var bf = create_bloom_filter(n_elements = n_tests, n_bits_per_item = 12, n_hashes = 7) 58 | bf.insert("Here we go!") 59 | assert bf.lookup("Here we go!") 60 | assert (not bf.lookup("I'm not here.")) 61 | 62 | let test_string_len = 50 63 | let sample_chars = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789" 64 | var k_test_elements = newSeq[string](n_tests) 65 | for i in 0..(n_tests - 1): 66 | var new_string = "" 67 | for j in 0..(test_string_len): 68 | new_string.add(sample_chars[rand(51)]) 69 | k_test_elements[i] = new_string 70 | 71 | let start_time = cpuTime() 72 | for i in 0..(n_tests - 1): 73 | bf.insert(k_test_elements[i]) 74 | let end_time = cpuTime() 75 | echo("Took ", formatFloat(end_time - start_time, format = ffDecimal, precision = 4), " seconds to insert ", n_tests, " items.") 76 | 77 | let start_time_b = cpuTime() 78 | for i in 0..(n_tests - 1): 79 | doAssert bf.lookup(k_test_elements[i]) 80 | let end_time_b = cpuTime() 81 | echo("Took ", formatFloat(end_time_b - start_time_b, format = ffDecimal, precision = 4), " seconds to lookup ", n_tests, " items.") 82 | -------------------------------------------------------------------------------- /bitarray/bloom_filter_varying_bit_demo.nim: -------------------------------------------------------------------------------- 1 | # This is a demonstration script 2 | # for the bitarray and also requires 3 | # murmur3 >= 0.1.2. As that is 4 | # not a requirement of the main BitArray 5 | # type, it is not installed automatically 6 | # by Babel / on install of this module. 7 | # 8 | # Specifically, this implements a Bloom filter 9 | # where the bit flipped when inserting `x` 10 | # is not determined by `hash(x)`, but instead 11 | # by some deterministic function `f(hash(x))`. 12 | # This is solely to demo the `[]` and `[]=` 13 | # procs for BitArrays using a TSlice 14 | import bitarray 15 | import murmur3 16 | import strutils 17 | import times 18 | from random import rand, randomize 19 | 20 | 21 | type 22 | BloomFilter = object 23 | bitarray: BitArray 24 | n_hashes: int 25 | n_bits_per_item: int 26 | n_bits: int 27 | 28 | proc create_bloom_filter*(n_elements: int, n_bits_per_item: int = 12, n_hashes: int = 6): BloomFilter = 29 | ## Generate a Bloom filter, same as bloom_filter_demo.nim 30 | let n_bits = n_elements * n_bits_per_item 31 | result = BloomFilter( 32 | bitarray: create_bitarray(n_bits), 33 | n_hashes: n_hashes, 34 | n_bits_per_item: n_bits_per_item, 35 | n_bits: n_bits 36 | ) 37 | 38 | {.push overflowChecks: off.} 39 | proc hash(bf: BloomFilter, item: string): seq[int] = 40 | var hashes: MurmurHashes = murmur_hash(item, 0) 41 | newSeq(result, bf.n_hashes) 42 | for i in 0..(bf.n_hashes - 1): 43 | result[i] = int(abs(hashes[0] + hashes[1] * i) mod (bf.n_bits - (sizeof(BitArrayScalar) * 8))) 44 | return result 45 | {.pop.} 46 | 47 | proc insert*(bf: var BloomFilter, item: string) = 48 | ## Put the string there 49 | let hashes = hash(bf, item) 50 | let insert_pattern: BitArrayScalar = BitArrayScalar(1) shl BitArrayScalar(hashes[0] mod (8 * sizeof(BitArrayScalar))) 51 | for h in hashes: 52 | bf.bitarray[h..(h + sizeof(BitArrayScalar) * 8)] = insert_pattern 53 | 54 | proc lookup*(bf: var BloomFilter, item: string): bool = 55 | ## Is the string there? 56 | let hashes = hash(bf, item) 57 | let pattern = BitArrayScalar(1) shl BitArrayScalar(hashes[0] mod (8 * sizeof(BitArrayScalar))) 58 | var lookup: BitArrayScalar = pattern 59 | for h in hashes: 60 | lookup = lookup and bf.bitarray[h..(h + sizeof(BitArrayScalar) * 8)] 61 | result = (lookup == pattern) 62 | 63 | 64 | when isMainModule: 65 | echo "Quick working Bloom filter example." 66 | let n_tests = int(2e7) 67 | var bf = create_bloom_filter(n_elements = n_tests, n_bits_per_item = 16, n_hashes = 7) 68 | bf.insert("Here we go!") 69 | assert bf.lookup("Here we go!") 70 | assert (not bf.lookup("I'm not here.")) 71 | 72 | let test_string_len = 50 73 | let sample_chars = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789" 74 | var k_test_elements = newSeq[string](n_tests) 75 | for i in 0..(n_tests - 1): 76 | var new_string = "" 77 | for j in 0..(test_string_len): 78 | new_string.add(sample_chars[rand(51)]) 79 | k_test_elements[i] = new_string 80 | 81 | let start_time = cpuTime() 82 | for i in 0..(n_tests - 1): 83 | bf.insert(k_test_elements[i]) 84 | let end_time = cpuTime() 85 | echo("Took ", formatFloat(end_time - start_time, format = ffDecimal, precision = 4), " seconds to insert ", n_tests, " items.") 86 | 87 | let start_time_b = cpuTime() 88 | for i in 0..(n_tests - 1): 89 | doAssert bf.lookup(k_test_elements[i]) 90 | let end_time_b = cpuTime() 91 | echo("Took ", formatFloat(end_time_b - start_time_b, format = ffDecimal, precision = 4), " seconds to lookup ", n_tests, " items.") 92 | -------------------------------------------------------------------------------- /circle.yml: -------------------------------------------------------------------------------- 1 | dependencies: 2 | cache_directories: 3 | - Nim 4 | - nimble 5 | - ~/.nimble 6 | 7 | pre: 8 | - if [[ ! -e Nim/bin/nim ]]; then git clone -b master git://github.com/nim-lang/Nim.git && cd Nim && git clone -b master --depth 1 git://github.com/nim-lang/csources && cd csources && sh build.sh && cd ../ && ./bin/nim c koch && ./koch boot -d:release; fi 9 | - ln -s `readlink -f Nim/bin/nim` /home/ubuntu/bin/nim 10 | - if [[ ! -e ~/.nimble/bin/nimble ]]; then if [[ ! -e nimble ]]; then git clone https://github.com/nim-lang/nimble.git; fi; cd nimble && nim c -r src/nimble install -y; fi 11 | - ln -s `readlink -f ~/.nimble/bin/nimble` /home/ubuntu/bin/nimble 12 | - nimble update 13 | - nimble install -y 14 | 15 | test: 16 | pre: 17 | - nim c bitarray/bitarray.nim 18 | 19 | override: 20 | - ./bitarray/bitarray 21 | --------------------------------------------------------------------------------