├── test ├── test_helper.exs ├── Bitarray │ └── bitarray_test.exs └── bloomex_test.exs ├── .github ├── FUNDING.yml ├── dependabot.yml └── workflows │ └── ci.yml ├── .formatter.exs ├── .gitignore ├── lib ├── bitarray │ └── bitarray.ex └── bloomex.ex ├── LICENSE ├── mix.exs └── README.md /test/test_helper.exs: -------------------------------------------------------------------------------- 1 | ExUnit.start() 2 | -------------------------------------------------------------------------------- /.github/FUNDING.yml: -------------------------------------------------------------------------------- 1 | ko_fi: gmcabrita 2 | github: gmcabrita 3 | -------------------------------------------------------------------------------- /.formatter.exs: -------------------------------------------------------------------------------- 1 | [ 2 | inputs: ["mix.exs", "{config,lib,test}/**/*.{ex,exs}"] 3 | ] 4 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | /_build 2 | /deps 3 | erl_crash.dump 4 | *.ez 5 | /docs/ 6 | .elixir_ls 7 | doc/ 8 | mix.lock -------------------------------------------------------------------------------- /.github/dependabot.yml: -------------------------------------------------------------------------------- 1 | version: 2 2 | updates: 3 | - package-ecosystem: mix 4 | directory: "/" 5 | schedule: 6 | interval: daily 7 | open-pull-requests-limit: 10 8 | reviewers: 9 | - gmcabrita 10 | assignees: 11 | - gmcabrita 12 | -------------------------------------------------------------------------------- /test/Bitarray/bitarray_test.exs: -------------------------------------------------------------------------------- 1 | defmodule BitarrayTest do 2 | use ExUnit.Case 3 | 4 | test "bitarray creation" do 5 | ba = Bloomex.BitArray.new(1) 6 | assert :array.is_array(ba) 7 | end 8 | 9 | test "bitarray set and get" do 10 | ba = Bloomex.BitArray.new(1009) 11 | ba = Bloomex.BitArray.set(ba, 1024) 12 | assert Bloomex.BitArray.get(ba, 1024) == true 13 | end 14 | 15 | test "bitarray value not set" do 16 | ba = Bloomex.BitArray.new(1) 17 | assert Bloomex.BitArray.get(ba, 5) == false 18 | end 19 | end 20 | -------------------------------------------------------------------------------- /lib/bitarray/bitarray.ex: -------------------------------------------------------------------------------- 1 | defmodule Bloomex.BitArray do 2 | @moduledoc """ 3 | This module implements a bit array using Erlang's `:array` module. 4 | """ 5 | 6 | use Bitwise 7 | 8 | @type t :: :array.array() 9 | 10 | @w 24 11 | 12 | @doc """ 13 | Returns a new bitarray of size `n`. 14 | """ 15 | @spec new(pos_integer) :: t 16 | def new(n) do 17 | :array.new(div(n - 1, @w) + 1, {:default, 0}) 18 | end 19 | 20 | @doc """ 21 | Returns an updated bitarray where the `i`th bit is set. 22 | """ 23 | @spec set(t, non_neg_integer) :: t 24 | def set(a, i) do 25 | ai = div(i, @w) 26 | v = :array.get(ai, a) 27 | v = v ||| 1 <<< rem(i, @w) 28 | :array.set(ai, v, a) 29 | end 30 | 31 | @doc """ 32 | Returns `true` if the bitarray has the `i`th bit set, 33 | otherwise returns `false`. 34 | """ 35 | @spec get(t, non_neg_integer) :: boolean 36 | def get(a, i) do 37 | ai = div(i, @w) 38 | v = :array.get(ai, a) 39 | (v &&& 1 <<< rem(i, @w)) !== 0 40 | end 41 | end 42 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Copyright (c) 2014 Gonçalo Cabrita 2 | 3 | Permission is hereby granted, free of charge, to any person obtaining 4 | a copy of this software and associated documentation files (the 5 | "Software"), to deal in the Software without restriction, including 6 | without limitation the rights to use, copy, modify, merge, publish, 7 | distribute, sublicense, and/or sell copies of the Software, and to 8 | permit persons to whom the Software is furnished to do so, subject to 9 | the following conditions: 10 | 11 | The above copyright notice and this permission notice shall be 12 | included in all copies or substantial portions of the Software. 13 | 14 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 15 | EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 16 | MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 17 | NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE 18 | LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION 19 | OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION 20 | WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 21 | -------------------------------------------------------------------------------- /.github/workflows/ci.yml: -------------------------------------------------------------------------------- 1 | name: CI 2 | env: 3 | MIX_ENV: test 4 | GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} 5 | on: [push, pull_request] 6 | jobs: 7 | build: 8 | runs-on: ubuntu-latest 9 | steps: 10 | - uses: actions/checkout@v2 11 | with: 12 | fetch-depth: 1 13 | - uses: erlef/setup-beam@v1 14 | with: 15 | otp-version: 22.3 16 | elixir-version: 1.10.2 17 | - run: mix local.hex --force 18 | - run: mix local.rebar --force 19 | - uses: actions/cache@v1 20 | with: 21 | path: deps 22 | key: ${{ runner.os }}-deps-${{ github.ref }}--${{ hashFiles('**/mix.lock') }} 23 | restore-keys: ${{ runner.os }}-deps- 24 | - uses: actions/cache@v1 25 | with: 26 | path: _build 27 | key: ${{ runner.os }}-build-${{ github.ref }} 28 | restore-keys: ${{ runner.os }}-build- 29 | - uses: actions/cache@v1 30 | with: 31 | path: ~/.cache/dialyzer 32 | key: ${{ runner.os }}-dialyzer-${{ github.ref }} 33 | restore-keys: ${{ runner.os }}-dialyzer- 34 | - run: mix do deps.get, compile 35 | - run: mix ci 36 | - run: mix coveralls.github 37 | -------------------------------------------------------------------------------- /mix.exs: -------------------------------------------------------------------------------- 1 | defmodule Bloomex.Mixfile do 2 | use Mix.Project 3 | 4 | @description """ 5 | Bloomex is a pure Elixir implementation of Scalable Bloom Filters. 6 | """ 7 | @github "https://github.com/gmcabrita/bloomex" 8 | 9 | def project() do 10 | [ 11 | app: :bloomex, 12 | name: "Bloomex", 13 | source_url: @github, 14 | homepage_url: nil, 15 | version: "1.2.0", 16 | elixir: "~> 1.6", 17 | build_embedded: Mix.env() == :prod, 18 | start_permanent: Mix.env() == :prod, 19 | description: @description, 20 | package: package(), 21 | deps: deps(), 22 | aliases: aliases(), 23 | preferred_cli_env: [ 24 | ci: :test 25 | ], 26 | test_coverage: [tool: ExCoveralls], 27 | docs: docs(), 28 | dialyzer_ignored_warnings: [ 29 | {:warn_contract_supertype, :_, {:extra_range, [:_, :__protocol__, 1, :_, :_]}} 30 | ] 31 | ] 32 | end 33 | 34 | def application() do 35 | [] 36 | end 37 | 38 | defp docs() do 39 | [ 40 | main: "readme", 41 | logo: nil, 42 | extras: ["README.md"] 43 | ] 44 | end 45 | 46 | defp deps() do 47 | [ 48 | {:excoveralls, "~> 0.10", only: [:dev, :test], runtime: false}, 49 | {:ex_doc, "~> 0.16", only: [:dev, :docs], runtime: false}, 50 | {:dialyzex, "~> 1.3.0", only: [:dev, :test], runtime: false}, 51 | {:jason, "~> 1.1"} 52 | ] 53 | end 54 | 55 | defp package() do 56 | [ 57 | files: ["lib", "mix.exs", "README.md", "LICENSE"], 58 | maintainers: ["Gonçalo Cabrita"], 59 | licenses: ["MIT"], 60 | links: %{"GitHub" => @github} 61 | ] 62 | end 63 | 64 | defp aliases do 65 | [ 66 | ci: [ 67 | "format --check-formatted", 68 | "test", 69 | "dialyzer" 70 | ] 71 | ] 72 | end 73 | end 74 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | Bloomex 2 | ======= 3 | 4 | [![Build Status](https://img.shields.io/github/workflow/status/gmcabrita/bloomex/CI/master.svg)](https://github.com/gmcabrita/bloomex/actions) 5 | [![Coverage Status](https://img.shields.io/coveralls/gmcabrita/bloomex.svg?style=flat)](https://coveralls.io/r/gmcabrita/bloomex?branch=master) 6 | [![Hex docs](http://img.shields.io/badge/hex.pm-docs-green.svg?style=flat)](https://hexdocs.pm/bloomex) 7 | [![Hex Version](http://img.shields.io/hexpm/v/bloomex.svg?style=flat)](https://hex.pm/packages/bloomex) 8 | [![License](http://img.shields.io/hexpm/l/bloomex.svg?style=flat)](https://github.com/gmcabrita/bloomex/blob/master/LICENSE) 9 | 10 | Bloomex is a pure Elixir implementation of [Scalable Bloom Filters](http://haslab.uminho.pt/cbm/files/dbloom.pdf). 11 | 12 | # Usage 13 | 14 | Add Bloomex as a dependency in your mix.exs file. 15 | 16 | ```elixir 17 | def deps do 18 | [{:bloomex, "~> 1.0"}] 19 | end 20 | ``` 21 | 22 | When you are done, run `mix deps.get` in your shell to fetch and compile Bloomex. 23 | 24 | # Examples 25 | 26 | ```iex 27 | iex> bf = Bloomex.scalable(1000, 0.1, 0.1, 2) 28 | %Bloomex.ScalableBloom... 29 | 30 | iex> bf = Bloomex.add(bf, 5) 31 | %Bloomex.ScalableBloom... 32 | 33 | iex> Bloomex.member?(bf, 5) 34 | true 35 | 36 | iex> bf = Bloomex.add(bf, 100) 37 | %Bloomex.ScalableBloom... 38 | 39 | iex> Bloomex.member?(bf, 100) 40 | true 41 | 42 | iex> Bloomex.member?(bf, 105) 43 | false 44 | ``` 45 | 46 | You can also pass in a hashing function to be used by the Bloom filter when creating one. 47 | 48 | (assuming we have [Murmur](https://hex.pm/packages/murmur/) installed as a dependency) 49 | 50 | ```iex 51 | iex> bf = Bloomex.scalable(1000, 0.1, 0.1, 2, &Murmur.hash_x86_128/1)) 52 | %Bloomex.ScalableBloom... 53 | 54 | iex> bf = Bloomex.add(bf, 5) 55 | %Bloomex.ScalableBloom... 56 | 57 | iex> Bloomex.member?(bf, 5) 58 | true 59 | 60 | iex> bf = Bloomex.add(bf, 100) 61 | %Bloomex.ScalableBloom... 62 | 63 | iex> Bloomex.member?(bf, 100) 64 | true 65 | `````` 66 | -------------------------------------------------------------------------------- /test/bloomex_test.exs: -------------------------------------------------------------------------------- 1 | defmodule BloomexTest do 2 | use ExUnit.Case 3 | 4 | test "scalable add, check membership and size" do 5 | bloom = Bloomex.scalable(1000, 0.01, 0.25, 2) 6 | assert Bloomex.member?(bloom, 100) == false 7 | 8 | bloom = Bloomex.add(bloom, 100) 9 | assert Bloomex.member?(bloom, 100) == true 10 | 11 | assert Bloomex.size(bloom) == 1 12 | end 13 | 14 | test "plain check membership, check capacity and size" do 15 | bloom = Bloomex.plain(50, 0.10) 16 | assert Bloomex.member?(bloom, 5) == false 17 | 18 | assert Bloomex.capacity(bloom) == 52 19 | assert Bloomex.size(bloom) == 0 20 | 21 | bloom = Bloomex.add(bloom, 2) |> Bloomex.add(3) |> Bloomex.add(2) |> Bloomex.add(10) 22 | assert Bloomex.member?(bloom, 10) == true 23 | end 24 | 25 | test "plain add" do 26 | bloom = Bloomex.plain(1_000_000, 0.001) |> Bloomex.add(5) 27 | assert Bloomex.member?(bloom, 5) == true 28 | end 29 | 30 | test "scalable with lots of additions" do 31 | bloom = Bloomex.scalable(6000, 0.001, 0.001, 3) 32 | 33 | bloom = Enum.reduce(1..10000, bloom, fn x, acc -> Bloomex.add(acc, x) end) 34 | assert Bloomex.size(bloom) == 9998 35 | end 36 | 37 | test "scalable force mb to be bigger than 16" do 38 | bloom = Bloomex.scalable(100, 0.1, 0.1, 3) 39 | 40 | bloom = Enum.reduce(1..90000, bloom, fn x, acc -> Bloomex.add(acc, x) end) 41 | assert Bloomex.member?(bloom, 1) == true 42 | assert Bloomex.size(bloom) == 80253 43 | end 44 | 45 | test "scalable serialization" do 46 | bloom = 47 | Bloomex.scalable(1000, 0.01, 0.25, 2) 48 | |> Bloomex.add(1000) 49 | 50 | deserialized_bloom = 51 | Bloomex.serialize(bloom) 52 | |> Bloomex.deserialize() 53 | 54 | assert Bloomex.member?(deserialized_bloom, 1000) 55 | end 56 | 57 | test "plain serialization" do 58 | bloom = 59 | Bloomex.plain(1000, 0.10) 60 | |> Bloomex.add(1000) 61 | 62 | deserialized_bloom = 63 | Bloomex.serialize(bloom) 64 | |> Bloomex.deserialize() 65 | 66 | assert Bloomex.member?(deserialized_bloom, 1000) 67 | end 68 | end 69 | -------------------------------------------------------------------------------- /lib/bloomex.ex: -------------------------------------------------------------------------------- 1 | defmodule Bloomex do 2 | @moduledoc """ 3 | This module implements a [Scalable Bloom Filter](http://haslab.uminho.pt/cbm/files/dbloom.pdf). 4 | 5 | ## Examples 6 | 7 | iex> bf = Bloomex.scalable(1000, 0.1, 0.1, 2) 8 | %Bloomex.ScalableBloom... 9 | 10 | iex> bf = Bloomex.add(bf, 5) 11 | %Bloomex.ScalableBloom... 12 | 13 | iex> Bloomex.member?(bf, 5) 14 | true 15 | 16 | iex> bf = Bloomex.add(bf, 100) 17 | %Bloomex.ScalableBloom... 18 | 19 | iex> Bloomex.member?(bf, 100) 20 | true 21 | 22 | iex> Bloomex.member?(bf, 105) 23 | false 24 | 25 | iex> Bloomex.member?(bf, 101) # false positive 26 | true 27 | 28 | """ 29 | 30 | @type t :: Bloomex.Bloom.t() | Bloomex.ScalableBloom.t() 31 | @type mode :: :bits | :size 32 | @type hash_func :: (term -> pos_integer) 33 | 34 | use Bitwise 35 | 36 | defmodule Bloom do 37 | @moduledoc """ 38 | A plain bloom filter. 39 | 40 | * :error_prob - error probability 41 | * :max - maximum number of elements 42 | * :mb - 2^mb = m, the size of each slice (bitvector) 43 | * :size - number of elements 44 | * :bv - list of bitvectors 45 | * :hash_func - hash function to use 46 | """ 47 | defstruct [ 48 | :error_prob, 49 | :max, 50 | :mb, 51 | :size, 52 | :bv, 53 | :hash_func 54 | ] 55 | 56 | @type t :: %Bloom{ 57 | error_prob: number, 58 | max: integer, 59 | mb: integer, 60 | size: integer, 61 | bv: [Bloomex.BitArray.t()], 62 | hash_func: Bloomex.hash_func() 63 | } 64 | end 65 | 66 | defmodule ScalableBloom do 67 | @moduledoc """ 68 | A scalable bloom filter. 69 | 70 | * :error_prob - error probability 71 | * :error_prob_ratio - error probability ratio 72 | * :growth - log 2 of growth ratio 73 | * :size - number of elements 74 | * :b - list of plain bloom filters 75 | * :hash_func - hash function to use 76 | """ 77 | defstruct [ 78 | :error_prob, 79 | :error_prob_ratio, 80 | :growth, 81 | :size, 82 | :b, 83 | :hash_func 84 | ] 85 | 86 | @type t :: %ScalableBloom{ 87 | error_prob: number, 88 | error_prob_ratio: number, 89 | growth: integer, 90 | size: integer, 91 | b: [Bloomex.Bloom.t()], 92 | hash_func: Bloomex.hash_func() 93 | } 94 | end 95 | 96 | @doc """ 97 | Returns a scalable Bloom filter based on the provided arguments: 98 | * `capacity`, the initial capacity before expanding 99 | * `error`, the error probability 100 | * `error_ratio`, the error probability ratio 101 | * `growth`, the growth ratio when full 102 | * `hash_func`, a hashing function 103 | 104 | If a hash function is not provided then `:erlang.phash2/2` will be used with 105 | the maximum range possible `(2^32)`. 106 | 107 | Restrictions: 108 | * `capacity` must be a positive integer 109 | * `error` must be a float between `0` and `1` 110 | * `error_ratio` must be a float between `0` and `1` 111 | * `growth` must be a positive integer between `1` and `3` 112 | * `hash_func` must be a function of type `term -> pos_integer` 113 | 114 | The function follows a rule of thumb due to double hashing where 115 | `capacity >= 4 / (error * (1 - error_ratio))` must hold true. 116 | """ 117 | @spec scalable(integer, number, number, 1 | 2 | 3, hash_func()) :: ScalableBloom.t() 118 | def scalable( 119 | capacity, 120 | error, 121 | error_ratio, 122 | growth, 123 | hash_func \\ fn x -> :erlang.phash2(x, 1 <<< 32) end 124 | ) 125 | when capacity > 0 and error > 0 and error < 1 and growth in [1, 2, 3] and 126 | error_ratio > 0 and error_ratio < 1 and capacity >= 4 / (error * (1 - error_ratio)) do 127 | %ScalableBloom{ 128 | error_prob: error, 129 | error_prob_ratio: error_ratio, 130 | growth: growth, 131 | size: 0, 132 | b: [plain(capacity, error * (1 - error_ratio), hash_func)], 133 | hash_func: hash_func 134 | } 135 | end 136 | 137 | @doc """ 138 | Returns a plain Bloom filter based on the provided arguments: 139 | * `capacity`, used to calculate the size of each bitvector slice 140 | * `error`, the error probability 141 | * `hash_func`, a hashing function 142 | 143 | If a hash function is not provided then `:erlang.phash2/2` will be used with 144 | the maximum range possible `(2^32)`. 145 | 146 | Restrictions: 147 | * `capacity` must be a positive integer 148 | * `error` must be a float between `0` and `1` 149 | * `hash_func` must be a function of type `term -> pos_integer` 150 | 151 | The function follows a rule of thumb due to double hashing where 152 | `capacity >= 4 / error` must hold true. 153 | """ 154 | @spec plain(integer, float, hash_func()) :: Bloom.t() 155 | def plain(capacity, error, hash_func \\ fn x -> :erlang.phash2(x, 1 <<< 32) end) 156 | when is_number(error) and capacity > 0 and is_float(error) and error > 0 and error < 1 and 157 | capacity >= 4 / error do 158 | plain(:size, capacity, error, hash_func) 159 | end 160 | 161 | @spec plain(mode(), integer, number, hash_func()) :: Bloom.t() 162 | defp plain(mode, capacity, e, hash_func) do 163 | k = 1 + trunc(log2(1 / e)) 164 | p = :math.pow(e, 1 / k) 165 | 166 | mb = 167 | case mode do 168 | :size -> 1 + trunc(-log2(1 - :math.pow(1 - p, 1 / capacity))) 169 | :bits -> capacity 170 | end 171 | 172 | m = 1 <<< mb 173 | n = trunc(:math.log(1 - p) / :math.log(1 - 1 / m)) 174 | 175 | %Bloom{ 176 | error_prob: e, 177 | max: n, 178 | mb: mb, 179 | size: 0, 180 | bv: for(_ <- 1..k, do: Bloomex.BitArray.new(1 <<< mb)), 181 | hash_func: hash_func 182 | } 183 | end 184 | 185 | @doc """ 186 | Returns the number of elements currently in the bloom filter. 187 | """ 188 | @spec size(t) :: pos_integer 189 | def size(%Bloom{size: size}), do: size 190 | def size(%ScalableBloom{size: size}), do: size 191 | 192 | @doc """ 193 | Returns the capacity of the bloom filter. 194 | 195 | A plain bloom filter will always have a fixed capacity, while a scalable one 196 | will always have a theoretically infite capacity. 197 | """ 198 | @spec capacity(Bloomex.t()) :: pos_integer | :infinity 199 | def capacity(%Bloom{max: n}), do: n 200 | def capacity(%ScalableBloom{}), do: :infinity 201 | 202 | @doc """ 203 | Returns `true` if the element `e` exists in the bloom filter, otherwise returns `false`. 204 | 205 | Keep in mind that you may get false positives, but never false negatives. 206 | """ 207 | @spec member?(Bloomex.t(), any) :: boolean 208 | def member?(%Bloom{mb: mb, hash_func: hash_func} = bloom, e) do 209 | hashes = make_hashes(mb, e, hash_func) 210 | hash_member(hashes, bloom) 211 | end 212 | 213 | def member?(%ScalableBloom{b: [%Bloom{mb: mb, hash_func: hash_func} | _]} = bloom, e) do 214 | hashes = make_hashes(mb, e, hash_func) 215 | hash_member(hashes, bloom) 216 | end 217 | 218 | defimpl Jason.Encoder, for: Tuple do 219 | def encode(data, options) when is_tuple(data) do 220 | data 221 | |> Tuple.to_list() 222 | |> Jason.Encoder.List.encode(options) 223 | end 224 | end 225 | 226 | defdelegate serialize(bloom), to: __MODULE__, as: :serialise 227 | @spec serialise(Bloomex.t()) :: binary 228 | def serialise(%ScalableBloom{b: b} = bloom) do 229 | b = 230 | Enum.map(b, fn bloom -> 231 | bloom |> Map.from_struct() |> Map.delete(:hash_func) 232 | end) 233 | 234 | bloom |> Map.from_struct() |> Map.delete(:hash_func) |> Map.put(:b, b) |> Jason.encode!() 235 | end 236 | 237 | def serialise(%Bloom{} = bloom) do 238 | bloom |> Map.from_struct() |> Map.delete(:hash_func) |> Jason.encode!() 239 | end 240 | 241 | defdelegate deserialize(bloom, func \\ fn x -> :erlang.phash2(x, 1 <<< 32) end), 242 | to: __MODULE__, 243 | as: :deserialise 244 | 245 | @spec deserialise( 246 | binary 247 | | maybe_improper_list( 248 | binary | maybe_improper_list(any, binary | []) | byte, 249 | binary | [] 250 | ), 251 | any 252 | ) :: Bloomex.t() 253 | def deserialise(bloom, func \\ fn x -> :erlang.phash2(x, 1 <<< 32) end) do 254 | bloom |> Jason.decode!() |> _deserialise(func) 255 | end 256 | 257 | def _deserialise( 258 | %{ 259 | "b" => b, 260 | "error_prob" => error_prob, 261 | "error_prob_ratio" => error_prob_ratio, 262 | "growth" => growth, 263 | "size" => size 264 | }, 265 | func 266 | ) do 267 | b = Enum.map(b, fn bloom -> _deserialise(bloom, func) end) 268 | 269 | %Bloomex.ScalableBloom{ 270 | b: b, 271 | error_prob: error_prob, 272 | error_prob_ratio: error_prob_ratio, 273 | growth: growth, 274 | size: size, 275 | hash_func: func 276 | } 277 | end 278 | 279 | def _deserialise( 280 | %{"bv" => bv, "error_prob" => error_prob, "max" => max, "mb" => mb, "size" => size}, 281 | func 282 | ) do 283 | bv = Enum.map(bv, fn e -> e |> get_tuple() end) 284 | 285 | %Bloomex.Bloom{ 286 | bv: bv, 287 | error_prob: error_prob, 288 | max: max, 289 | mb: mb, 290 | size: size, 291 | hash_func: func 292 | } 293 | end 294 | 295 | defp get_tuple(element) when is_list(element) do 296 | element 297 | |> Enum.map(&get_tuple/1) 298 | |> List.to_tuple() 299 | end 300 | 301 | defp get_tuple("array"), do: :array 302 | 303 | defp get_tuple(element), do: element 304 | 305 | @spec hash_member(pos_integer, Bloomex.t()) :: boolean 306 | defp hash_member(hashes, %Bloom{mb: mb, bv: bv}) do 307 | mask = (1 <<< mb) - 1 308 | {i1, i0} = make_indexes(mask, hashes) 309 | 310 | all_set(mask, i1, i0, bv) 311 | end 312 | 313 | defp hash_member(hashes, %ScalableBloom{b: b}) do 314 | Enum.any?(b, &hash_member(hashes, &1)) 315 | end 316 | 317 | @spec make_hashes(pos_integer, any, hash_func()) :: pos_integer | {pos_integer, pos_integer} 318 | defp make_hashes(mb, e, hash_func) when mb <= 16 do 319 | hash_func.({e}) 320 | end 321 | 322 | defp make_hashes(mb, e, hash_func) when mb <= 32 do 323 | {hash_func.({e}), hash_func.([e])} 324 | end 325 | 326 | @spec make_indexes(pos_integer, {pos_integer, pos_integer}) :: {pos_integer, pos_integer} 327 | defp make_indexes(mask, {h0, h1}) when mask > 1 <<< 16 do 328 | masked_pair(mask, h0, h1) 329 | end 330 | 331 | defp make_indexes(mask, {h0, _}) do 332 | make_indexes(mask, h0) 333 | end 334 | 335 | @spec make_indexes(pos_integer, pos_integer) :: {pos_integer, pos_integer} 336 | defp make_indexes(mask, h0) do 337 | masked_pair(mask, h0 >>> 16, h0) 338 | end 339 | 340 | @spec masked_pair(pos_integer, pos_integer, pos_integer) :: {pos_integer, pos_integer} 341 | defp masked_pair(mask, x, y), do: {x &&& mask, y &&& mask} 342 | 343 | @spec all_set(pos_integer, pos_integer, pos_integer, [Bloomex.BitArray.t()]) :: boolean 344 | defp all_set(_, _, _, []), do: true 345 | 346 | defp all_set(mask, i1, i, [h | t]) do 347 | if Bloomex.BitArray.get(h, i) do 348 | all_set(mask, i1, i + i1 &&& mask, t) 349 | else 350 | false 351 | end 352 | end 353 | 354 | @doc """ 355 | Returns a bloom filter with the element `e` added. 356 | """ 357 | @spec add(Bloomex.t(), any) :: Bloomex.t() 358 | def add(%Bloom{mb: mb, hash_func: hash_func} = bloom, e) do 359 | hashes = make_hashes(mb, e, hash_func) 360 | hash_add(hashes, bloom) 361 | end 362 | 363 | def add(%ScalableBloom{error_prob_ratio: r, size: size, growth: g, b: [h | t] = bs} = bloom, e) do 364 | %Bloom{mb: mb, error_prob: err, max: n, size: head_size, hash_func: hash_func} = h 365 | hashes = make_hashes(mb, e, hash_func) 366 | 367 | if hash_member(hashes, bloom) do 368 | bloom 369 | else 370 | if head_size < n do 371 | %{bloom | size: size + 1, b: [hash_add(hashes, h) | t]} 372 | else 373 | b = :bits |> plain(mb + g, err * r, hash_func) |> add(e) 374 | %{bloom | size: size + 1, b: [b | bs]} 375 | end 376 | end 377 | end 378 | 379 | @spec hash_add(pos_integer, Bloom.t()) :: Bloom.t() 380 | defp hash_add(hashes, %Bloom{mb: mb, bv: bv, size: size} = b) do 381 | mask = (1 <<< mb) - 1 382 | {i1, i0} = make_indexes(mask, hashes) 383 | 384 | if all_set(mask, i1, i0, bv) do 385 | b 386 | else 387 | %{b | size: size + 1, bv: set_bits(mask, i1, i0, bv, [])} 388 | end 389 | end 390 | 391 | @spec set_bits( 392 | pos_integer, 393 | pos_integer, 394 | pos_integer, 395 | [Bloomex.BitArray.t()], 396 | [Bloomex.BitArray.t()] 397 | ) :: [Bloomex.BitArray.t()] 398 | defp set_bits(_, _, _, [], acc), do: Enum.reverse(acc) 399 | 400 | defp set_bits(mask, i1, i, [h | t], acc) do 401 | set_bits(mask, i1, i + i1 &&& mask, t, [Bloomex.BitArray.set(h, i) | acc]) 402 | end 403 | 404 | @spec log2(float) :: float 405 | defp log2(x) do 406 | :math.log(x) / :math.log(2) 407 | end 408 | end 409 | --------------------------------------------------------------------------------