├── rebar.lock ├── config ├── sys.config └── vm.args ├── priv └── .gitignore ├── examples └── elixir │ └── bloom_example │ ├── rebar.lock │ ├── test │ ├── test_helper.exs │ └── bloom_example_test.exs │ ├── .formatter.exs │ ├── mix.lock │ ├── lib │ └── bloom_example.ex │ ├── mix.exs │ ├── README.md │ └── .gitignore ├── crates └── bloom │ ├── .gitignore │ ├── src │ ├── atoms.rs │ ├── container.rs │ ├── lib.rs │ ├── options.rs │ ├── filter │ │ ├── mod.rs │ │ ├── bloom.rs │ │ └── forgetful.rs │ └── nif.rs │ ├── Cargo.toml │ ├── build.rs │ └── Cargo.lock ├── imgs └── base │ ├── soft │ └── rebar3 │ ├── docker-entry.sh │ └── Dockerfile ├── test ├── spec.spec ├── forgetful_SUITE.erl └── bloom_SUITE.erl ├── .cargo └── config ├── .gitignore ├── docker-compose.yml ├── src ├── erbloom.app.src ├── bloom_nif.erl └── bloom.erl ├── .github └── workflows │ └── main.yml ├── LICENSE ├── docker.mk ├── Makefile ├── README.md ├── doc └── overview.edoc └── rebar.config /rebar.lock: -------------------------------------------------------------------------------- 1 | []. 2 | -------------------------------------------------------------------------------- /config/sys.config: -------------------------------------------------------------------------------- 1 | []. 2 | -------------------------------------------------------------------------------- /priv/.gitignore: -------------------------------------------------------------------------------- 1 | *.so 2 | -------------------------------------------------------------------------------- /examples/elixir/bloom_example/rebar.lock: -------------------------------------------------------------------------------- 1 | []. 2 | -------------------------------------------------------------------------------- /crates/bloom/.gitignore: -------------------------------------------------------------------------------- 1 | target/* 2 | .idea/* 3 | libpath 4 | -------------------------------------------------------------------------------- /examples/elixir/bloom_example/test/test_helper.exs: -------------------------------------------------------------------------------- 1 | ExUnit.start() 2 | -------------------------------------------------------------------------------- /imgs/base/soft/rebar3: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Vonmo/erbloom/HEAD/imgs/base/soft/rebar3 -------------------------------------------------------------------------------- /imgs/base/docker-entry.sh: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | set -e 3 | 4 | a=0 5 | until [ ! $a -lt 10 ]; do 6 | sleep 1 7 | done 8 | -------------------------------------------------------------------------------- /test/spec.spec: -------------------------------------------------------------------------------- 1 | {alias, common, "."}. 2 | {logdir, "./logs/"}. 3 | 4 | {suites, common, [bloom_SUITE, forgetful_SUITE]}. 5 | -------------------------------------------------------------------------------- /.cargo/config: -------------------------------------------------------------------------------- 1 | [target.'cfg(target_os = "macos")'] 2 | rustflags = [ 3 | "-C", "link-arg=-undefined", 4 | "-C", "link-arg=dynamic_lookup", 5 | ] 6 | -------------------------------------------------------------------------------- /examples/elixir/bloom_example/.formatter.exs: -------------------------------------------------------------------------------- 1 | # Used by "mix format" 2 | [ 3 | inputs: ["{mix,.formatter}.exs", "{config,lib,test}/**/*.{ex,exs}"] 4 | ] 5 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | .eunit 2 | *.o 3 | *.beam 4 | *.plt 5 | erl_crash.dump 6 | ebin/*.beam 7 | .concrete/DEV_MODE 8 | .rebar 9 | _build 10 | ./rebar3 11 | .idea 12 | -------------------------------------------------------------------------------- /examples/elixir/bloom_example/mix.lock: -------------------------------------------------------------------------------- 1 | %{ 2 | "erbloom": {:hex, :erbloom, "2.1.0-rc.2", "e54ebd0e8aaa4bca16730a2cd675f75b321285cc86fca83b857ae23d98911c1a", [:rebar3], [], "hexpm", "7ab852222d0f94af88b46d633bc000df5ecf0f0d23be6dd548b03cf71e03600d"}, 3 | } 4 | -------------------------------------------------------------------------------- /examples/elixir/bloom_example/lib/bloom_example.ex: -------------------------------------------------------------------------------- 1 | defmodule BloomExample do 2 | @moduledoc """ 3 | Documentation for `BloomExample`. 4 | """ 5 | 6 | @doc """ 7 | Hello world. 8 | 9 | ## Examples 10 | 11 | iex> BloomExample.hello() 12 | :world 13 | 14 | """ 15 | def hello do 16 | :world 17 | end 18 | end 19 | -------------------------------------------------------------------------------- /crates/bloom/src/atoms.rs: -------------------------------------------------------------------------------- 1 | rustler::atoms! { 2 | ok, 3 | error, 4 | vn1, 5 | notfound, 6 | undefined, 7 | binencode, 8 | bindecode, 9 | wrong_filter_type, 10 | bitmap_size, 11 | bloom, 12 | capacity, 13 | fbf, 14 | filter_type, 15 | fp_rate, 16 | items_count, 17 | rotate_at, 18 | } 19 | -------------------------------------------------------------------------------- /config/vm.args: -------------------------------------------------------------------------------- 1 | ## Name of the node 2 | -name dev@127.0.0.1 3 | 4 | ## Cookie for distributed erlang 5 | -setcookie dev 6 | 7 | ## Enable kernel poll and a few async threads 8 | ##+K true 9 | ##+A 5 10 | 11 | ## Increase number of concurrent ports/sockets 12 | ##-env ERL_MAX_PORTS 4096 13 | 14 | ## Tweak GC to run more often 15 | ##-env ERL_FULLSWEEP_AFTER 10 16 | -------------------------------------------------------------------------------- /examples/elixir/bloom_example/test/bloom_example_test.exs: -------------------------------------------------------------------------------- 1 | defmodule BloomExampleTest do 2 | use ExUnit.Case 3 | doctest BloomExample 4 | 5 | test "test bloom lib" do 6 | key = "binkeyfortest" 7 | {:ok, ref} = :bloom.new(10, 80) 8 | :ok = :bloom.set(ref, key) 9 | true = :bloom.check(ref, key) 10 | false = :bloom.check(ref, "unknown_key") 11 | end 12 | end 13 | -------------------------------------------------------------------------------- /docker-compose.yml: -------------------------------------------------------------------------------- 1 | version: '3.2' 2 | services: 3 | test: 4 | container_name: erbloom.test 5 | build: 6 | context: imgs/base 7 | args: 8 | UID: "${UID:?Please export UID}" 9 | GID: "${GID:?Please export GID}" 10 | user: "${UID:?Please export UID}:${GID:?Please export GID}" 11 | volumes: 12 | - "./:/project" 13 | tmpfs: 14 | - /run 15 | - /tmp 16 | -------------------------------------------------------------------------------- /crates/bloom/Cargo.toml: -------------------------------------------------------------------------------- 1 | [package] 2 | name = "bloom" 3 | version = "0.2.0" 4 | authors = ["Maxim Molchanov "] 5 | license = "MIT/Apache-2.0" 6 | build = "build.rs" 7 | 8 | [lib] 9 | name = "bloom" 10 | crate-type = ["dylib"] 11 | 12 | [dependencies] 13 | bincode = "1.2.1" 14 | rustler = "0.22.0-rc.0" 15 | bloomfilter = "1.0.2" 16 | siphasher = "0.3.3" 17 | serde = { version = "1.0", features = ["derive"] } 18 | -------------------------------------------------------------------------------- /examples/elixir/bloom_example/mix.exs: -------------------------------------------------------------------------------- 1 | defmodule BloomExample.MixProject do 2 | use Mix.Project 3 | 4 | def project do 5 | [ 6 | app: :bloom_example, 7 | version: "0.1.0", 8 | elixir: "~> 1.10", 9 | start_permanent: Mix.env() == :prod, 10 | deps: deps() 11 | ] 12 | end 13 | 14 | def application do 15 | [ 16 | extra_applications: [:logger] 17 | ] 18 | end 19 | 20 | defp deps do 21 | [ 22 | {:erbloom, "~> 2.1.0-rc.2"} 23 | ] 24 | end 25 | end 26 | -------------------------------------------------------------------------------- /src/erbloom.app.src: -------------------------------------------------------------------------------- 1 | {application, erbloom, 2 | [{pkg_name, erbloom}, 3 | {description, "Fast Bloom Filter"}, 4 | {vsn, git}, 5 | {registered, []}, 6 | {applications, 7 | [kernel, 8 | stdlib 9 | ]}, 10 | {env,[]}, 11 | {modules, []}, 12 | 13 | {maintainers, ["Maxim Molchanov"]}, 14 | {licenses, ["MIT", "Apache-2.0"]}, 15 | {links, [{"GitHub", "https://github.com/Vonmo/erbloom"}]}, 16 | {files, [ 17 | "priv", "src", "crates", 18 | "rebar.config", "rebar.lock", 19 | "doc", "LICENSE", "README.md" 20 | ]} 21 | ]}. 22 | -------------------------------------------------------------------------------- /crates/bloom/src/container.rs: -------------------------------------------------------------------------------- 1 | use serde::{Deserialize, Serialize}; 2 | use options::FilterOptions; 3 | 4 | #[derive(Serialize, Deserialize, PartialEq, Clone)] 5 | pub struct RawSerializedFilter { 6 | pub payload: Vec, 7 | pub num_bits: u64, 8 | pub num_funs: u32, 9 | pub sip00: u64, 10 | pub sip01: u64, 11 | pub sip10: u64, 12 | pub sip11: u64, 13 | } 14 | 15 | #[derive(Serialize, Deserialize, PartialEq, Clone)] 16 | pub struct SerializedFilter { 17 | pub filters: Vec, 18 | pub opts: FilterOptions, 19 | pub upsert_num: u64, 20 | } 21 | -------------------------------------------------------------------------------- /crates/bloom/src/lib.rs: -------------------------------------------------------------------------------- 1 | extern crate bincode; 2 | extern crate bloomfilter; 3 | extern crate core; 4 | extern crate rustler; 5 | extern crate serde; 6 | extern crate siphasher; 7 | 8 | mod atoms; 9 | mod options; 10 | mod container; 11 | mod filter; 12 | mod nif; 13 | 14 | rustler::init!( 15 | "bloom_nif", 16 | [ 17 | nif::new, 18 | nif::serialize, 19 | nif::deserialize, 20 | nif::set, 21 | nif::vcheck, 22 | nif::clear, 23 | nif::check_and_set, 24 | nif::ftype, 25 | nif::check_serialized, 26 | ], 27 | load = nif::on_load 28 | ); 29 | -------------------------------------------------------------------------------- /examples/elixir/bloom_example/README.md: -------------------------------------------------------------------------------- 1 | # BloomExample 2 | 3 | **TODO: Add description** 4 | 5 | ## Installation 6 | 7 | If [available in Hex](https://hex.pm/docs/publish), the package can be installed 8 | by adding `bloom_example` to your list of dependencies in `mix.exs`: 9 | 10 | ```elixir 11 | def deps do 12 | [ 13 | {:bloom_example, "~> 0.1.0"} 14 | ] 15 | end 16 | ``` 17 | 18 | Documentation can be generated with [ExDoc](https://github.com/elixir-lang/ex_doc) 19 | and published on [HexDocs](https://hexdocs.pm). Once published, the docs can 20 | be found at [https://hexdocs.pm/bloom_example](https://hexdocs.pm/bloom_example). 21 | 22 | -------------------------------------------------------------------------------- /.github/workflows/main.yml: -------------------------------------------------------------------------------- 1 | name: CI 2 | 3 | on: 4 | push: 5 | branches: [ master ] 6 | pull_request: 7 | branches: [ master ] 8 | 9 | jobs: 10 | 11 | test: 12 | 13 | runs-on: ubuntu-latest 14 | 15 | container: 16 | image: erlang:23.0 17 | 18 | steps: 19 | - uses: actions/checkout@v2 20 | - uses: actions-rs/toolchain@v1 21 | with: 22 | toolchain: stable 23 | - name: Compile 24 | run: rebar3 compile 25 | - name: Run EUnit tests 26 | run: rebar3 as test eunit 27 | - name: Run CT tests 28 | run: rebar3 as test ct --spec ./test/spec.spec --sname tests --readable true --basic_html false 29 | -------------------------------------------------------------------------------- /examples/elixir/bloom_example/.gitignore: -------------------------------------------------------------------------------- 1 | # The directory Mix will write compiled artifacts to. 2 | /_build/ 3 | 4 | # If you run "mix test --cover", coverage assets end up here. 5 | /cover/ 6 | 7 | # The directory Mix downloads your dependencies sources to. 8 | /deps/ 9 | 10 | # Where third-party dependencies like ExDoc output generated docs. 11 | /doc/ 12 | 13 | # Ignore .fetch files in case you like to edit your project deps locally. 14 | /.fetch 15 | 16 | # If the VM crashes, it generates a dump, let's ignore it too. 17 | erl_crash.dump 18 | 19 | # Also ignore archive artifacts (built via "mix archive.build"). 20 | *.ez 21 | 22 | # Ignore package tarball (built via "mix hex.build"). 23 | bloom_example-*.tar 24 | 25 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2018 Maxim Molchanov 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /docker.mk: -------------------------------------------------------------------------------- 1 | ERL = $(shell which erl) 2 | 3 | ifeq ($(ERL),) 4 | $(error "Erlang not available on this system") 5 | endif 6 | 7 | REBAR = @`which rebar3` 8 | 9 | DEPS_PLT=$(CURDIR)/.deps_plt 10 | DEPS=erts kernel stdlib 11 | # export QUIET:=1 12 | # export DEBUG:=1 13 | export REBAR_COLOR:="low" 14 | 15 | 16 | # use to override vars for your platform 17 | ifeq (env.mk,$(wildcard env.mk)) 18 | include env.mk 19 | endif 20 | 21 | .PHONY: deps doc 22 | 23 | all: compile 24 | 25 | compile: 26 | $(REBAR) compile 27 | 28 | tests: 29 | $(REBAR) as test ct --spec ./test/spec.spec 30 | 31 | tests_cover: 32 | $(REBAR) as test ct --spec ./test/spec.spec --cover 33 | 34 | cover: 35 | @$(REBAR) cover 36 | @xdg-open "_build/test/cover/index.html" 37 | 38 | doc: 39 | $(REBAR) edoc 40 | 41 | lint: 42 | $(REBAR) as lint lint 43 | 44 | xref: 45 | $(REBAR) as prod xref skip_deps=true 46 | 47 | dialyzer: 48 | $(REBAR) dialyzer skip_deps=true 49 | 50 | deps: 51 | $(REBAR) deps 52 | 53 | clean: 54 | $(REBAR) clean 55 | 56 | prod: 57 | $(REBAR) as prod release 58 | 59 | rel: compile 60 | $(REBAR) release 61 | 62 | run: rel 63 | $(REBAR) run 64 | 65 | auto: 66 | $(REBAR) auto 67 | 68 | attach: 69 | $(ERL) -name a@127.0.0.1 -remsh dev@127.0.0.1 -setcookie dev 70 | 71 | upgrade_rebar: 72 | $(REBAR) local upgrade 73 | -------------------------------------------------------------------------------- /Makefile: -------------------------------------------------------------------------------- 1 | DOCKER = $(shell which docker) 2 | ifeq ($(DOCKER),) 3 | $(error "Docker not available on this system") 4 | endif 5 | 6 | DOCKER_COMPOSE = UID=`id -u` GID=`id -g` $(shell which docker-compose) 7 | ifeq ($(DOCKER_COMPOSE),) 8 | $(error "DockerCompose not available on this system") 9 | endif 10 | 11 | # use to override vars for your platform 12 | ifeq (env.mk,$(wildcard env.mk)) 13 | include env.mk 14 | endif 15 | 16 | .PHONY: deps doc test 17 | 18 | all: build_imgs up tests rel 19 | 20 | build_imgs: 21 | @echo "Update docker images..." 22 | @${DOCKER_COMPOSE} build 23 | 24 | up: 25 | @${DOCKER_COMPOSE} up -d 26 | 27 | down: 28 | @${DOCKER_COMPOSE} down 29 | 30 | tests: 31 | @echo "Testing..." 32 | @${DOCKER_COMPOSE} exec test bash -c "cd /project && make -f docker.mk tests" 33 | 34 | test: tests 35 | 36 | rel: 37 | @echo "Build release..." 38 | @${DOCKER_COMPOSE} exec test bash -c "cd /project && make -f docker.mk prod" 39 | 40 | doc: 41 | @echo "Generate docs..." 42 | @${DOCKER_COMPOSE} exec test bash -c "cd /project && make -f docker.mk doc" 43 | 44 | lint: 45 | @echo "Lint..." 46 | @${DOCKER_COMPOSE} exec test bash -c "cd /project && make -f docker.mk lint" 47 | 48 | xref: 49 | @echo "Xref analysis..." 50 | @${DOCKER_COMPOSE} exec test bash -c "cd /project && make -f docker.mk xref" 51 | 52 | dialyzer: 53 | @echo "Dialyzer..." 54 | @${DOCKER_COMPOSE} exec test bash -c "cd /project && make -f docker.mk dialyzer" -------------------------------------------------------------------------------- /crates/bloom/build.rs: -------------------------------------------------------------------------------- 1 | use std::{env, fs::File, io::Write, path::Path}; 2 | 3 | fn main() { 4 | // Directory contain this build-script 5 | let here = env::var("CARGO_MANIFEST_DIR").unwrap(); 6 | // Host triple (arch of machine doing to build, not necessarily the arch we're building for) 7 | let host_triple = env::var("HOST").unwrap(); 8 | // Target triple (arch we're building for, not necessarily the arch we're building on) 9 | let target_triple = env::var("TARGET").unwrap(); 10 | // debug or release 11 | let profile = env::var("PROFILE").unwrap(); 12 | // We use target OS to determine if extension is `.so`, `.dll`, or `.dylib` 13 | let file_name = match env::var("CARGO_CFG_TARGET_OS").unwrap().as_str() { 14 | "windows" => "libbloom.dll", 15 | "macos" | "ios" => "libbloom.dylib", 16 | _ => "libbloom.so", 17 | }; 18 | 19 | // Location of libbloom 20 | let mut libpath = Path::new(&here).join("target"); 21 | if host_triple != target_triple { 22 | libpath = libpath.join(&target_triple); 23 | } 24 | libpath = libpath.join(&profile).join(&file_name); 25 | 26 | // Create file in `here` and write the path to the directory of 27 | // where to find libbloom 28 | let libpath_file_path = Path::new(&here).join("libpath"); 29 | let mut libpath_file = File::create(libpath_file_path).unwrap(); 30 | write!(libpath_file, "{}", libpath.to_str().unwrap()).unwrap(); 31 | } 32 | -------------------------------------------------------------------------------- /imgs/base/Dockerfile: -------------------------------------------------------------------------------- 1 | FROM erlang:23.0 AS erlang 2 | FROM rust:1.43.1 AS rust 3 | FROM debian:10.4-slim 4 | 5 | LABEL author="elzor" 6 | LABEL maintainer="m.molchanov@vonmo.com" 7 | LABEL version="1.1" 8 | LABEL description="image for erbloom" 9 | 10 | ENV USER=erbloom 11 | ARG UID=1000 12 | ARG GID=1000 13 | ENV UID=${UID} 14 | ENV GID=${GID} 15 | RUN groupadd --gid $GID $USER \ 16 | && useradd --uid $UID --gid $GID --shell /bin/bash --create-home $USER 17 | RUN usermod -u $UID $USER && groupmod -g $GID $USER 18 | 19 | ENV TZ=UTC 20 | RUN ln -snf /usr/share/zoneinfo/$TZ /etc/localtime && echo $TZ > /etc/timezone 21 | 22 | COPY --from=erlang /usr/local/lib/erlang /usr/local/lib/erlang 23 | ENV PATH=/usr/local/lib/erlang/bin:$PATH 24 | 25 | COPY --from=rust /usr/local/rustup /usr/local/rustup 26 | COPY --from=rust /usr/local/cargo /usr/local/cargo 27 | RUN chown $GID:$UID /usr/local/rustup && chown $GID:$UID /usr/local/cargo 28 | 29 | ENV RUSTUP_HOME=/usr/local/rustup \ 30 | CARGO_HOME=/usr/local/cargo \ 31 | PATH=/usr/local/cargo/bin:$PATH 32 | 33 | RUN apt-get update && apt-get install --no-install-recommends -y \ 34 | apt-transport-https \ 35 | ca-certificates \ 36 | build-essential \ 37 | git \ 38 | openssl \ 39 | libssl-dev \ 40 | libncurses6 \ 41 | libncurses-dev \ 42 | locales \ 43 | && apt-get clean \ 44 | && rm -rf /var/lib/apt/lists /var/cache/apt \ 45 | && find /usr/local -name examples | xargs rm -rf 46 | 47 | ADD ./soft/rebar3 /usr/local/bin/rebar3 48 | RUN chmod a+rwx /usr/local/bin/rebar3 49 | 50 | USER $USER 51 | 52 | ADD ./docker-entry.sh /docker-entry.sh 53 | CMD ["/docker-entry.sh"] 54 | -------------------------------------------------------------------------------- /crates/bloom/src/options.rs: -------------------------------------------------------------------------------- 1 | use filter::FilterType; 2 | use rustler::{Decoder, NifResult, Term}; 3 | use serde::{Deserialize, Serialize}; 4 | 5 | #[derive(Serialize, Deserialize, PartialEq, Clone, Copy, Debug)] 6 | pub struct FilterOptions { 7 | pub filter_type: Option, 8 | pub bitmap_size: Option, 9 | pub items_count: Option, 10 | pub capacity: Option, 11 | pub rotate_at: Option, 12 | pub fp_rate: Option, 13 | } 14 | 15 | impl Default for FilterOptions { 16 | fn default() -> FilterOptions { 17 | FilterOptions { 18 | filter_type: None, 19 | bitmap_size: None, 20 | items_count: None, 21 | capacity: None, 22 | rotate_at: None, 23 | fp_rate: None, 24 | } 25 | } 26 | } 27 | 28 | impl<'a> Decoder<'a> for FilterOptions { 29 | fn decode(term: Term<'a>) -> NifResult { 30 | let mut opts = Self::default(); 31 | use rustler::{Error, MapIterator}; 32 | for (key, value) in MapIterator::new(term).ok_or(Error::BadArg)? { 33 | match key.atom_to_string()?.as_ref() { 34 | "filter_type" => opts.filter_type = Some(value.decode()?), 35 | "bitmap_size" => opts.bitmap_size = Some(value.decode()?), 36 | "items_count" => opts.items_count = Some(value.decode()?), 37 | "capacity" => opts.capacity = Some(value.decode()?), 38 | "rotate_at" => opts.rotate_at = Some(value.decode()?), 39 | "fp_rate" => opts.fp_rate = Some(value.decode()?), 40 | _ => (), 41 | } 42 | } 43 | Ok(opts) 44 | } 45 | } 46 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # erbloom 2 | Safe and Fast Bloom Filter + FBFs for Erlang 3 | 4 | ![CI](https://github.com/Vonmo/erbloom/workflows/CI/badge.svg?branch=master) 5 | 6 | [Online Documentation](https://hexdocs.pm/erbloom/) 7 | 8 | ## Features: 9 | * [Bloom filter structure](https://en.wikipedia.org/wiki/Bloom_filter) (type: `bloom`) 10 | * [Forgetful Bloom Filters](http://dprg.cs.uiuc.edu/docs/fbf_cac15/fbfpaper-2.pdf) (type: `fbf`) 11 | 12 | ## Supported OS: 13 | * linux 14 | * macos 15 | * windows 16 | 17 | ## Deps definition: 18 | mix.exs: 19 | `{:erbloom, "~> 2.1.0-rc.2"}` 20 | 21 | rebar.config: 22 | `{erbloom, "2.1.0-rc.2"}` 23 | 24 | erlang.mk: 25 | `dep_erbloom = hex 2.1.0-rc.2` 26 | 27 | ## Using as a lib 28 | 1. Add deps in rebar.conf: 29 | ``` 30 | {deps, [ 31 | {erbloom, ".*", {git, "https://github.com/Vonmo/erbloom.git", {tag, "v2.0.2"}}} 32 | ]} 33 | ``` 34 | 2. Now you can create a new filter instance: 35 | `{ok, Filter} = bloom:new(9585059,1000000).` 36 | or filter with wanted rate of false positives: 37 | `bloom:new_optimal(1000000, 0.55).` 38 | 3. Create a new forgetful filter: 39 | `{ok, Filter} = bloom:new_forgetful(BitmapSize, ItemsCount, NumFilters, RotateAfter).` 40 | or with fp_rate: 41 | `bloom:new_forgetful_optimal(ItemsCount, NumFilters, RotateAfter, FpRate).` 42 | 3. Set a new element 43 | `bloom:set(Filter, "somekey").` 44 | 4. Check up element 45 | `bloom:check(Filter, "anotherkey").` 46 | 5. Serialize 47 | `{ok, Binary} = bloom:serialize(Filter).` 48 | 6. Deserialize 49 | `bloom:deserialize(Binary).` 50 | 51 | ## Docker environment 52 | * `make build_imgs` - build docker images 53 | * `make up` - run sandbox 54 | * `make down` - terminate sandbox 55 | * `make tests` - run tests 56 | * `make lint` - linter 57 | * `make xref` - xref analysis 58 | * `make prod` - generate release for target 59 | * `make doc` - generate documentation from EDoc 60 | 61 | ## 62 | Without docker you must install erlang >=20.1 and rust >=1.23 on your machine. After you can run these goals: 63 | **release:** 64 | `rebar3 as prod release` 65 | 66 | **test:** 67 | `rebar3 as test ct` 68 | -------------------------------------------------------------------------------- /doc/overview.edoc: -------------------------------------------------------------------------------- 1 | -*- html -*- 2 | 3 | erbloom overview page 4 | 5 | 6 | @author Maxim Molchanov 7 | @copyright 2018-2020 Maxim Molchanov 8 | @version 2.1.0 9 | @title Safe and Fast Bloom Filter 10 | @doc erbloom is a safe and fast Bloom filter (Including Forgetful filters and False Positive Rate option) for Erlang/Elixir 11 | 12 | Updated: {@date} 13 | 14 | == Contents == 15 | 16 |
    17 |
  1. {@section Using as a lib}
  2. 18 |
  3. {@section API overview}
  4. 19 |
  5. {@section Docker-based sandbox}
  6. 20 |
  7. {@section Contributors}
  8. 21 |
22 | 23 | == Using as a lib in Erlang == 24 | 25 | rebar.conf:
26 | {erbloom, "2.1.0-rc.0"} 27 |
or
28 | {erbloom, ".*", {git, "https://github.com/Vonmo/erbloom.git", {tag, "v2.1.0"}}} 29 | 30 | erlang.mk:
31 | dep_erbloom = hex 2.1.0-rc.0 32 | 33 | mix.exs:
34 | {:erbloom, "~> 2.1.0-rc.0"} 35 | 36 | == Workflow == 37 | Now you can create a new filter instance:
38 | {ok, Filter} = {@link bloom:new/2. bloom:new}(9585059,1000000). 39 | or filter with wanted rate of false positives: {@link bloom:new_optimal/2. bloom:new_optimal}(1000000, 0.55). 40 | 41 | Create a new forgetful filter:
42 | {ok, Filter} = {@link bloom:new_forgetful/4. bloom:new_forgetful}(BitmapSize, ItemsCount, NumFilters, RotateAfter). 43 | or with fp_rate: {@link bloom:new_forgetful_optimal/4. bloom:new_forgetful_optimal}(ItemsCount, NumFilters, RotateAfter, FpRate). 44 | 45 | Set a new element:
46 | ok | true = {@link bloom:set/2. bloom:set}(Filter, "somekey"). 47 | 48 | Check up element:
49 | true | false = {@link bloom:check/2. bloom:check}(Filter, "anotherkey"). 50 | 51 | Serialize:
52 | {ok, Binary} = {@link bloom:serialize/1. bloom:serialize}(Filter). 53 | 54 | Deserialize:
55 | {@link bloom:deserialize/1. bloom:deserialize}(Binary). 56 | 57 | == Docker-based sandbox == 58 | 59 | make build_imgs - build docker images 60 | 61 | make up - run sandbox 62 | 63 | make down - terminate sandbox 64 | 65 | make tests - run tests 66 | 67 | make lint - linter 68 | 69 | make xref - xref analysis 70 | 71 | make prod - generate release for target 72 | 73 | make doc - generate documentation from EDoc 74 | 75 | == Contributors == 76 | -------------------------------------------------------------------------------- /crates/bloom/src/filter/mod.rs: -------------------------------------------------------------------------------- 1 | mod bloom; 2 | mod forgetful; 3 | 4 | pub use self::{bloom::BloomFilter, forgetful::ForgetfulFilter}; 5 | use crate::{container::SerializedFilter, options::FilterOptions}; 6 | use rustler::{Atom, Decoder, Encoder, Env, Error, NifResult, Term}; 7 | use serde::{Deserialize, Serialize}; 8 | 9 | type Result = std::result::Result; 10 | 11 | pub enum Filter { 12 | Bloom(BloomFilter), 13 | Forgetful(ForgetfulFilter), 14 | } 15 | 16 | impl Filter { 17 | pub fn new(opts: FilterOptions) -> Result { 18 | Ok(match opts.filter_type.ok_or("`filter_type` not set")? { 19 | FilterType::Forgetful => Filter::Forgetful(ForgetfulFilter::new(opts)?), 20 | FilterType::Bloom => Filter::Bloom(BloomFilter::new(opts)?), 21 | }) 22 | } 23 | 24 | pub fn filter_type(&self) -> FilterType { 25 | match self { 26 | Self::Bloom(_) => FilterType::Bloom, 27 | Self::Forgetful(_) => FilterType::Forgetful, 28 | } 29 | } 30 | 31 | pub fn serialize(&self) -> Result> { 32 | match self { 33 | Self::Bloom(filt) => filt.serialize(), 34 | Self::Forgetful(filt) => filt.serialize(), 35 | } 36 | } 37 | 38 | pub fn restore(prev_filter: SerializedFilter) -> Result { 39 | Ok( 40 | match prev_filter 41 | .opts 42 | .filter_type 43 | .ok_or("`filter_type` not set")? 44 | { 45 | FilterType::Bloom => Self::Bloom(BloomFilter::restore(prev_filter)), 46 | FilterType::Forgetful => Self::Forgetful(ForgetfulFilter::restore(prev_filter)), 47 | }, 48 | ) 49 | } 50 | 51 | pub fn clear(&mut self) { 52 | match self { 53 | Self::Bloom(filt) => filt.clear(), 54 | Self::Forgetful(filt) => filt.clear(), 55 | } 56 | } 57 | 58 | pub fn check(&self, key: &[u8]) -> bool { 59 | match self { 60 | Self::Bloom(filt) => filt.check(key), 61 | Self::Forgetful(filt) => filt.check(key), 62 | } 63 | } 64 | } 65 | 66 | #[derive(Debug, Serialize, Deserialize, PartialEq, Clone, Copy)] 67 | pub enum FilterType { 68 | Bloom, 69 | Forgetful, 70 | } 71 | 72 | impl<'a> Decoder<'a> for FilterType { 73 | fn decode(term: Term<'a>) -> NifResult { 74 | use crate::atoms; 75 | let ft = Atom::from_term(term)?; 76 | if ft == atoms::bloom() { 77 | Ok(Self::Bloom) 78 | } else if ft == atoms::fbf() { 79 | Ok(Self::Forgetful) 80 | } else { 81 | Err(Error::RaiseTerm(Box::new(atoms::wrong_filter_type()))) 82 | } 83 | } 84 | } 85 | 86 | impl Encoder for FilterType { 87 | fn encode<'a>(&self, env: Env<'a>) -> Term<'a> { 88 | match self { 89 | Self::Bloom => crate::atoms::bloom(), 90 | Self::Forgetful => crate::atoms::fbf(), 91 | } 92 | .encode(env) 93 | } 94 | } 95 | -------------------------------------------------------------------------------- /src/bloom_nif.erl: -------------------------------------------------------------------------------- 1 | %% @doc 2 | %% This is a NIF wrapper around [https://crates.io/crates/bloomfilter], 3 | %% a simple but fast Bloom filter implementation, that requires only 2 hash functions, 4 | %% generated with SipHash-1-3 using randomized keys. 5 | %% @end 6 | 7 | -module(bloom_nif). 8 | %% API 9 | -export([ 10 | new/1, 11 | serialize/1, 12 | deserialize/1, 13 | set/2, 14 | check/2, 15 | check_and_set/2, 16 | clear/1, 17 | ftype/1 18 | ]). 19 | 20 | %% Native library support 21 | -export([load/0]). 22 | -on_load(load/0). 23 | 24 | %% rev this if the internal structure of the bloom filter changes 25 | -define(ERBLOOM_VERSION1, 1). 26 | 27 | -type serialized_bloom() :: binary(). 28 | -opaque bloom() :: reference(). 29 | -export_type([bloom/0, serialized_bloom/0]). 30 | 31 | %% @doc Create a new filter structure. 32 | -spec new(_Opts :: map()) -> {ok, Bloom :: bloom()} | {error, Reason :: binary()}. 33 | new(_Opts) -> 34 | not_loaded(?LINE). 35 | 36 | %% @doc Get type of filter 37 | -spec ftype(_Ref :: bloom()) -> number() | {error, Reason :: binary()}. 38 | ftype(_Ref) -> 39 | not_loaded(?LINE). 40 | 41 | %% @doc Serialize a bloom filter to Erlang terms. `check/2' can be used against this serialized form efficently. 42 | -spec serialize(Bloom :: bloom()) -> {ok, serialized_bloom()}. 43 | serialize(_Ref) -> 44 | not_loaded(?LINE). 45 | 46 | %% @doc Deserialize a previously serialized bloom filter back into a bloom filter reference. 47 | -spec deserialize(serialized_bloom()) -> {ok, bloom()}. 48 | deserialize(_SerializedBloom) -> 49 | not_loaded(?LINE). 50 | 51 | %% @doc Record the presence of `Key' in `Bloom'. 52 | -spec set(Bloom :: bloom(), Key :: term()) -> ok. 53 | set(_Ref, _Key) -> 54 | not_loaded(?LINE). 55 | 56 | %% @doc Check for the presence of `Key' in `Bloom'. 57 | %% Serialized and binary encoded bloom filters can be used with this 58 | %% function when you wish to check for the key and do not need to use set 59 | %% (eg. a static bloom filter stored in a database). 60 | -spec check(bloom() | serialized_bloom(), term()) -> boolean(). 61 | check(SerializedBloom, Key) when is_binary(SerializedBloom) -> 62 | check_serialized(SerializedBloom, Key); 63 | check(Bloom, Key) -> 64 | vcheck(Bloom, Key). 65 | vcheck(_Bloom, _Key) when is_reference(_Bloom) -> 66 | not_loaded(?LINE). 67 | 68 | %% @doc Record the presence of `Key' in `Bloom' and return whether it was present before. 69 | -spec check_and_set(Bloom :: bloom(), Key :: term()) -> boolean(). 70 | check_and_set(_Ref, _Key) -> 71 | not_loaded(?LINE). 72 | 73 | %% @doc Clear all of the bits in the filter, removing all keys from the set. 74 | -spec clear(Bloom :: bloom()) -> ok. 75 | clear(_Ref) -> 76 | not_loaded(?LINE). 77 | 78 | -spec check_serialized(Bloom :: bloom(), Key :: term()) -> boolean(). 79 | check_serialized(_Ref, _Key) -> 80 | not_loaded(?LINE). 81 | 82 | %% @private 83 | load() -> 84 | erlang:load_nif(filename:join(priv(), "libbloom"), none). 85 | 86 | not_loaded(Line) -> 87 | erlang:nif_error({error, {not_loaded, [{module, ?MODULE}, {line, Line}]}}). 88 | 89 | priv() -> 90 | case code:priv_dir(?MODULE) of 91 | {error, _} -> 92 | EbinDir = filename:dirname(code:which(?MODULE)), 93 | AppPath = filename:dirname(EbinDir), 94 | filename:join(AppPath, "priv"); 95 | Path -> 96 | Path 97 | end. 98 | -------------------------------------------------------------------------------- /test/forgetful_SUITE.erl: -------------------------------------------------------------------------------- 1 | -module(forgetful_SUITE). 2 | -compile(export_all). 3 | -import(ct_helper, [config/2]). 4 | -include_lib("common_test/include/ct.hrl"). 5 | -include_lib("eunit/include/eunit.hrl"). 6 | 7 | all() -> 8 | [ 9 | {group, forgetful} 10 | ]. 11 | 12 | groups() -> 13 | [ 14 | 15 | {forgetful, 16 | [parallel, shuffle], 17 | [new, serialize, deserialize, set_and_check, clear]} 18 | ]. 19 | 20 | 21 | %% ============================================================================= 22 | %% init 23 | %% ============================================================================= 24 | init_per_group(_Group, Config) -> 25 | ok = application:load(erbloom), 26 | {ok, _} = application:ensure_all_started(erbloom, temporary), 27 | [{init, true} | Config]. 28 | 29 | 30 | %% ============================================================================= 31 | %% end 32 | %% ============================================================================= 33 | end_per_group(_Group, _Config) -> 34 | ok = application:stop(erbloom), 35 | ok = application:unload(erbloom), 36 | ok. 37 | 38 | 39 | %% ============================================================================= 40 | %% group: bloom 41 | %% ============================================================================= 42 | new(_) -> 43 | {ok, Ref} = bloom:new_forgetful(50, 80, 3, 1), 44 | true = is_reference(Ref), 45 | 1 = bloom:type(Ref), 46 | ok. 47 | 48 | serialize(_) -> 49 | {ok, Ref} = bloom:new_forgetful(50, 80, 3, 1), 50 | {ok, SerializedFilter} = bloom:serialize(Ref), 51 | true = is_binary(SerializedFilter), 52 | ok. 53 | 54 | deserialize(_) -> 55 | {ok, Filter1} = bloom:new_forgetful(50, 80, 3, 1), 56 | {ok, _} = bloom:serialize(Filter1), 57 | Key = "test_key", 58 | false = bloom:check(Filter1, Key), 59 | false = bloom:set(Filter1, Key), 60 | true = bloom:set(Filter1, Key), 61 | true = bloom:check(Filter1, Key), 62 | {ok, Serialized} = bloom:serialize(Filter1), 63 | true = is_binary(Serialized), 64 | {ok, Filter13} = bloom:deserialize(Serialized), 65 | 1 = bloom:type(Filter13), 66 | {ok, Filter14} = bloom:deserialize(Serialized), 67 | ok = bloom:clear(Filter1), 68 | false = bloom:check(Filter1, Key), 69 | true = bloom:check(Filter13, Key), 70 | true = bloom:check(Filter14, Key), 71 | ok. 72 | 73 | set_and_check(_) -> 74 | {ok, Ref} = bloom:new_forgetful(50, 80, 3, 1), 75 | Key1 = <<"k1">>, 76 | Key2 = <<"k2">>, 77 | Key3 = <<"k3">>, 78 | Key4 = <<"k4">>, 79 | Key5 = <<"k5">>, 80 | Key6 = <<"k6">>, 81 | Keys = [Key1, Key2, Key3, Key4, Key5, Key6], 82 | [] = [K || K <- Keys, bloom:check(Ref, K)], 83 | false = bloom:set(Ref, Key1), 84 | ?assertEqual([Key1], [K || K <- Keys, bloom:check(Ref, K)]), 85 | false = bloom:set(Ref, Key2), 86 | ?assertEqual([Key1, Key2], [K || K <- Keys, bloom:check(Ref, K)]), 87 | false = bloom:set(Ref, Key3), 88 | ?assertEqual([Key1, Key2, Key3], [K || K <- Keys, bloom:check(Ref, K)]), 89 | false = bloom:set(Ref, Key4), 90 | ?assertEqual([Key2, Key3, Key4], [K || K <- Keys, bloom:check(Ref, K)]), 91 | false = bloom:set(Ref, Key5), 92 | ?assertEqual([Key3, Key4, Key5], [K || K <- Keys, bloom:check(Ref, K)]), 93 | false = bloom:set(Ref, Key6), 94 | ?assertEqual([Key4, Key5, Key6], [K || K <- Keys, bloom:check(Ref, K)]), 95 | ok. 96 | 97 | clear(_) -> 98 | Key = "binkeyfortest", 99 | {ok, Ref} = bloom:new_forgetful(50, 80, 3, 1), 100 | false = bloom:set(Ref, Key), 101 | true = bloom:set(Ref, Key), 102 | true = bloom:check(Ref, Key), 103 | ok = bloom:clear(Ref), 104 | false = bloom:check(Ref, Key), 105 | ok. 106 | 107 | %% ============================================================================= 108 | %% helpers 109 | %% ============================================================================= -------------------------------------------------------------------------------- /crates/bloom/src/filter/bloom.rs: -------------------------------------------------------------------------------- 1 | use super::FilterType; 2 | use bloomfilter::Bloom; 3 | use container::{RawSerializedFilter, SerializedFilter}; 4 | use options::FilterOptions; 5 | use siphasher::sip::SipHasher13; 6 | use std::hash::Hash; 7 | use std::hash::Hasher; 8 | 9 | type Result = std::result::Result; 10 | 11 | pub struct BloomFilter { 12 | pub filter: Bloom<[u8]>, 13 | } 14 | 15 | impl BloomFilter { 16 | pub fn new(opts: FilterOptions) -> Result { 17 | assert_eq!(opts.filter_type, Some(FilterType::Bloom)); 18 | Ok(BloomFilter { 19 | filter: match opts { 20 | FilterOptions { 21 | bitmap_size: None, 22 | items_count: Some(items_count), 23 | fp_rate: Some(fp_rate), 24 | .. 25 | } => Bloom::new_for_fp_rate(items_count, fp_rate), 26 | FilterOptions { 27 | bitmap_size: Some(bitmap_size), 28 | items_count: Some(items_count), 29 | .. 30 | } => Bloom::new(bitmap_size, items_count), 31 | _ => { 32 | return Err(format!( 33 | "must set `items_count` AND (`fp_rate` OR `bitmap_size`)], got {:?}", 34 | opts 35 | )) 36 | } 37 | }, 38 | }) 39 | } 40 | 41 | pub fn set(&mut self, key: &[u8]) { 42 | self.filter.set(key) 43 | } 44 | 45 | pub fn check(&self, key: &[u8]) -> bool { 46 | self.filter.check(key) 47 | } 48 | 49 | pub fn check_serialized(&self, filter: SerializedFilter, key: &[u8]) -> bool { 50 | let pf = &filter.filters.to_vec()[0]; 51 | let sips = [ 52 | SipHasher13::new_with_keys(pf.sip00, pf.sip01), 53 | SipHasher13::new_with_keys(pf.sip10, pf.sip11), 54 | ]; 55 | let mut hashes = [0u64, 0u64]; 56 | for k_i in 0..pf.num_funs { 57 | let bit_offset = (bloom_hash(&mut hashes, &key, k_i, &sips) % pf.num_bits) as usize; 58 | let byte_offset = bit_offset / 8; 59 | let bit = 7 - (bit_offset % 8); 60 | if (pf.payload[byte_offset] >> bit) & 1 != 1 { 61 | return false; 62 | } 63 | } 64 | true 65 | } 66 | 67 | pub fn check_and_set(&mut self, key: &[u8]) -> bool { 68 | self.filter.check_and_set(key) 69 | } 70 | 71 | pub fn clear(&mut self) { 72 | self.filter.clear(); 73 | } 74 | 75 | pub fn serialize(&self) -> Result> { 76 | let mut opts = FilterOptions::default(); 77 | opts.filter_type = Some(FilterType::Bloom); 78 | 79 | let sips = self.filter.sip_keys(); 80 | let bitmap = self.filter.bitmap(); 81 | match bincode::serialize(&SerializedFilter { 82 | filters: vec![RawSerializedFilter { 83 | payload: bitmap, 84 | num_bits: self.filter.number_of_bits(), 85 | num_funs: self.filter.number_of_hash_functions(), 86 | sip00: sips[0].0, 87 | sip01: sips[0].1, 88 | sip10: sips[1].0, 89 | sip11: sips[1].1, 90 | }], 91 | opts, 92 | upsert_num: 0, 93 | }) { 94 | Ok(res) => Ok(res), 95 | Err(e) => Err(format!("bincode serialization failed with: {}", e)), 96 | } 97 | } 98 | 99 | pub fn restore(prev_filter: SerializedFilter) -> BloomFilter { 100 | let pf = &prev_filter.filters.to_vec()[0]; 101 | 102 | BloomFilter { 103 | filter: Bloom::from_existing( 104 | &pf.payload, 105 | pf.num_bits, 106 | pf.num_funs, 107 | [(pf.sip00, pf.sip01), (pf.sip10, pf.sip11)], 108 | ), 109 | } 110 | } 111 | } 112 | 113 | // helper for check_serialized, extracted from the bloom crate source code 114 | fn bloom_hash(hashes: &mut [u64; 2], item: &[u8], k_i: u32, sips: &[SipHasher13; 2]) -> u64 { 115 | if k_i < 2 { 116 | let mut sip = sips[k_i as usize]; 117 | item.hash(&mut sip); 118 | let hash = sip.finish(); 119 | hashes[k_i as usize] = hash; 120 | hash 121 | } else { 122 | hashes[0].wrapping_add((u64::from(k_i)).wrapping_mul(hashes[1]) % 0xffff_ffff_ffff_ffc5) 123 | } 124 | } 125 | -------------------------------------------------------------------------------- /test/bloom_SUITE.erl: -------------------------------------------------------------------------------- 1 | -module(bloom_SUITE). 2 | -compile(export_all). 3 | -import(ct_helper, [config/2]). 4 | -include_lib("common_test/include/ct.hrl"). 5 | -include_lib("eunit/include/eunit.hrl"). 6 | 7 | all() -> 8 | [ 9 | {group, bloom}, 10 | {group, perf} 11 | ]. 12 | 13 | groups() -> 14 | [ 15 | {bloom, 16 | [parallel, shuffle], 17 | [new, serialize, deserialize, set, check, clear, check_and_set, check_serialized]}, 18 | {perf, 19 | [shuffle], 20 | [perf_sequential_csc, perf_parallel_read, perf_sequential_csc_large]} 21 | ]. 22 | 23 | 24 | %% ============================================================================= 25 | %% init 26 | %% ============================================================================= 27 | init_per_group(_Group, Config) -> 28 | ok = application:load(erbloom), 29 | {ok, _} = application:ensure_all_started(erbloom, temporary), 30 | [{init, true} | Config]. 31 | 32 | 33 | %% ============================================================================= 34 | %% end 35 | %% ============================================================================= 36 | end_per_group(_Group, _Config) -> 37 | ok = application:stop(erbloom), 38 | ok = application:unload(erbloom), 39 | ok. 40 | 41 | 42 | %% ============================================================================= 43 | %% group: bloom 44 | %% ============================================================================= 45 | new(_) -> 46 | {ok, Ref} = bloom:new(10, 80), 47 | true = is_reference(Ref), 48 | 0 = bloom:type(Ref), 49 | ok. 50 | 51 | serialize(_) -> 52 | {ok, Ref} = bloom:new(10, 80), 53 | {ok, SerializedFilter} = bloom:serialize(Ref), 54 | true = is_binary(SerializedFilter), 55 | ok. 56 | 57 | deserialize(_) -> 58 | {ok, Filter1} = bloom:new(10, 80), 59 | {ok, _} = bloom:serialize(Filter1), 60 | Key = "test_key", 61 | false = bloom:check(Filter1, Key), 62 | ok = bloom:set(Filter1, Key), 63 | true = bloom:check(Filter1, Key), 64 | {ok, Serialized} = bloom:serialize(Filter1), 65 | true = is_binary(Serialized), 66 | {ok, Filter13} = bloom:deserialize(Serialized), 67 | {ok, Filter14} = bloom:deserialize(Serialized), 68 | ok = bloom:clear(Filter1), 69 | false = bloom:check(Filter1, Key), 70 | true = bloom:check(Filter13, Key), 71 | true = bloom:check(Filter14, Key), 72 | ok. 73 | 74 | set(_) -> 75 | {ok, Ref} = bloom:new(10, 80), 76 | Key = "binkeyfortest", 77 | ok = bloom:set(Ref, Key), 78 | ok. 79 | 80 | check(_) -> 81 | Key = "binkeyfortest", 82 | {ok, Ref} = bloom:new(10, 80), 83 | false = bloom:check(Ref, Key), 84 | ok = bloom:set(Ref, Key), 85 | true = bloom:check(Ref, Key), 86 | false = bloom:check(Ref, "unknown_key"), 87 | %% we can check a serialized bloom 88 | {ok, Serialized} = bloom:serialize(Ref), 89 | true = is_binary(Serialized), 90 | true = bloom:check(Serialized, Key), 91 | ok. 92 | 93 | check_and_set(_) -> 94 | Key = "binkeyfortest", 95 | {ok, Ref} = bloom:new(10, 80), 96 | false = bloom:check_and_set(Ref, Key), 97 | true = bloom:check(Ref, Key), 98 | true = bloom:check_and_set(Ref, Key), 99 | bloom:clear(Ref), 100 | false = bloom:check(Ref, Key), 101 | false = bloom:check_and_set(Ref, Key), 102 | ok. 103 | 104 | check_serialized(_) -> 105 | Key = "some_key_to_check", 106 | {ok, Ref} = bloom:new(10, 80), 107 | ok = bloom:set(Ref, Key), 108 | {ok, SerializedFilter} = bloom:serialize(Ref), 109 | true = is_binary(SerializedFilter), 110 | true = bloom:check(Ref, Key), 111 | false = bloom:check(Ref, "unknown_key"), 112 | true = bloom:check(SerializedFilter, Key), 113 | false = bloom:check(SerializedFilter, "unknown_key"), 114 | ok. 115 | 116 | clear(_) -> 117 | Key = "binkeyfortest", 118 | {ok, Ref} = bloom:new(10, 80), 119 | ok = bloom:set(Ref, Key), 120 | true = bloom:check(Ref, Key), 121 | ok = bloom:clear(Ref), 122 | false = bloom:check(Ref, Key), 123 | ok. 124 | 125 | %% ============================================================================= 126 | %% group: perf 127 | %% ============================================================================= 128 | perf_sequential_csc(_) -> 129 | R = perftest:sequential(1000, 130 | fun() -> 131 | {ok, Ref} = bloom:new(10, 80), 132 | Key = uuid:uuid_to_string(uuid:get_v4()), 133 | ok = bloom:set(Ref, Key), 134 | true = bloom:check(Ref, Key) 135 | end), 136 | true = R > 3000, 137 | ok. 138 | 139 | perf_sequential_csc_large(_) -> 140 | {ok, Ref} = bloom:new(9585059, 1000000), 141 | R = perftest:sequential(100, 142 | fun() -> 143 | Key = uuid:uuid_to_string(uuid:get_v4()), 144 | ok = bloom:set(Ref, Key), 145 | true = bloom:check(Ref, Key), 146 | ok 147 | end), 148 | true = R > 3000, 149 | ok. 150 | 151 | perf_parallel_read(_) -> 152 | Key = uuid:uuid_to_string(uuid:get_v4()), 153 | {ok, Ref} = bloom:new(10, 80), 154 | ok = bloom:set(Ref, Key), 155 | R = perftest:comprehensive(1000, 156 | fun() -> 157 | true = bloom:check(Ref, Key) 158 | end), 159 | true = lists:all(fun(E) -> E >= 3500 end, R), 160 | ok. 161 | 162 | %% ============================================================================= 163 | %% helpers 164 | %% ============================================================================= 165 | -------------------------------------------------------------------------------- /rebar.config: -------------------------------------------------------------------------------- 1 | {minimum_otp_vsn, "20.1"}. 2 | {base_dir, "_build"}. 3 | {deps_dir, "lib"}. 4 | {root_dir, "."}. 5 | {checkouts_dir, "_checkouts"}. 6 | {plugins_dir, "plugins"}. 7 | {erl_opts, [ 8 | debug_info, 9 | warnings_as_errors, 10 | {platform_define, "(linux|solaris|freebsd|darwin)", 'HAVE_SENDFILE'}, 11 | {platform_define, "(linux|freebsd)", 'BACKLOG', 128} 12 | ]}. 13 | {validate_app_modules, true}. 14 | {cover_enabled, true}. 15 | 16 | {provider_hooks, [ 17 | {post, [ 18 | {compile, {appup, compile}}, 19 | {clean, {appup, clean}} 20 | ]} 21 | ]}. 22 | 23 | {dist_node, [ 24 | {setcookie, 'dev'}, 25 | {name, 'dev@127.0.0.1'} 26 | ]}. 27 | 28 | {deps, [ 29 | ]}. 30 | 31 | {xref_warnings,false}. 32 | {xref_extra_paths,[]}. 33 | {xref_checks,[undefined_function_calls,undefined_functions,locals_not_used, 34 | exports_not_used,deprecated_function_calls, 35 | deprecated_functions]}. 36 | %%{xref_queries,[{"(xc - uc) || (xu - x - b - (\"mod\":\".*foo\"/\"4\"))", []}]}. 37 | %%{xref_ignores,[{M, F}, {M, F, A}]}. 38 | 39 | {dialyzer, [{warnings, [unmatched_returns, error_handling]}, 40 | {get_warnings, true}, 41 | {plt_apps, top_level_deps}, 42 | {plt_location, local}, 43 | {plt_prefix, "erbloom"}]}. 44 | 45 | {relx, [ 46 | {release, {erbloom, "0.1.0"}, [ 47 | erbloom 48 | ]}, 49 | {dev_mode, true}, 50 | {include_src, false}, 51 | {include_erts, false}, 52 | {extended_start_script, true}, 53 | {vm_args, "config/vm.args"}, 54 | {sys_config, "config/sys.config"} 55 | ]}. 56 | 57 | {erl_opts, [no_debug_info, warnings_as_errors]}. 58 | {relx, [{dev_mode, false}, {include_erts, true}, {include_src, false}]}. 59 | {pre_hooks, [ 60 | {compile, "cargo build --manifest-path=crates/bloom/Cargo.toml --release"}, 61 | {compile, "sh -c \"cp $(cat crates/bloom/libpath) priv/libbloom.so\""} 62 | ]}. 63 | 64 | {profiles, [ 65 | {prod, [ 66 | {erl_opts, [no_debug_info, warnings_as_errors]}, 67 | {relx, [{dev_mode, false}, {include_erts, true}, {include_src, false}]}, 68 | {pre_hooks, [ 69 | {compile, "cargo build --manifest-path=crates/bloom/Cargo.toml --release"}, 70 | {compile, "sh -c \"cp $(cat crates/bloom/libpath) priv/libbloom.so\""} 71 | ]} 72 | ]}, 73 | 74 | {test, [ 75 | {deps, [ 76 | {perftest, ".*", {git, "https://github.com/Elzor/perftest.git", {branch, "master"}}}, 77 | {uuid, ".*", {git, "https://github.com/okeuday/uuid.git"}} 78 | ]}, 79 | {erl_opts, [debug_info, warnings_as_errors, nowarn_export_all]}, 80 | {relx, [{dev_mode, true}, {include_erts, false}, {include_src, false}]}, 81 | {pre_hooks, [ 82 | {compile, "cargo build --manifest-path=crates/bloom/Cargo.toml --release"}, 83 | {compile, "sh -c \"cp $(cat crates/bloom/libpath) priv/libbloom.so\""} 84 | ]} 85 | ]}, 86 | 87 | {lint, [ 88 | {plugins, [ 89 | {rebar3_lint, {git, "https://github.com/project-fifo/rebar3_lint.git", {tag, "0.1.2"}}} 90 | ]} 91 | ]} 92 | ]}. 93 | 94 | {plugins, [ 95 | rebar3_auto, 96 | rebar3_appup_plugin, 97 | rebar3_vendor, 98 | rebar3_hex 99 | ]}. 100 | 101 | 102 | {elvis_output_format, colors}. 103 | {elvis, 104 | [#{dirs => ["apps/*/src", "src"], 105 | filter => "*.erl", 106 | rules => [{elvis_style, line_length, 107 | #{ignore => [], 108 | limit => 80, 109 | skip_comments => false}}, 110 | {elvis_style, no_tabs}, 111 | {elvis_style, no_trailing_whitespace}, 112 | {elvis_style, macro_names, #{ignore => []}}, 113 | {elvis_style, macro_module_names}, 114 | {elvis_style, operator_spaces, #{rules => [{right, ","}, 115 | {right, "++"}, 116 | {left, "++"}]}}, 117 | {elvis_style, nesting_level, #{level => 3}}, 118 | {elvis_style, god_modules, 119 | #{limit => 25, 120 | ignore => []}}, 121 | {elvis_style, no_if_expression}, 122 | {elvis_style, invalid_dynamic_call, 123 | #{ignore => []}}, 124 | {elvis_style, used_ignored_variable}, 125 | {elvis_style, no_behavior_info}, 126 | { 127 | elvis_style, 128 | module_naming_convention, 129 | #{regex => "^[a-z]([a-z0-9]*_?)*(_SUITE)?$", 130 | ignore => []} 131 | }, 132 | { 133 | elvis_style, 134 | function_naming_convention, 135 | #{regex => "^([a-z][a-z0-9]*_?)*$"} 136 | }, 137 | {elvis_style, state_record_and_type}, 138 | {elvis_style, no_spec_with_records}, 139 | {elvis_style, dont_repeat_yourself, #{min_complexity => 10}}, 140 | {elvis_style, no_debug_call, #{ignore => []}} 141 | ] 142 | }, 143 | #{dirs => ["."], 144 | filter => "Makefile", 145 | rules => [{elvis_project, no_deps_master_erlang_mk, #{ignore => []}}, 146 | {elvis_project, protocol_for_deps_erlang_mk, #{ignore => []}}] 147 | }, 148 | #{dirs => ["."], 149 | filter => "rebar.config", 150 | rules => [{elvis_project, no_deps_master_rebar, #{ignore => []}}, 151 | {elvis_project, protocol_for_deps_rebar, #{ignore => []}}] 152 | } 153 | ] 154 | }. 155 | -------------------------------------------------------------------------------- /crates/bloom/src/filter/forgetful.rs: -------------------------------------------------------------------------------- 1 | use super::FilterType; 2 | use bloomfilter::Bloom; 3 | use container::{RawSerializedFilter, SerializedFilter}; 4 | use options::FilterOptions; 5 | 6 | type Result = std::result::Result; 7 | 8 | pub struct ForgetfulFilter { 9 | pub filters: Vec>, 10 | pub items_count: usize, 11 | pub capacity: usize, 12 | pub rotate_at: usize, 13 | pub insertion_count: usize, 14 | } 15 | 16 | impl ForgetfulFilter { 17 | pub fn new(opts: FilterOptions) -> Result { 18 | assert_eq!(opts.filter_type, Some(FilterType::Forgetful)); 19 | 20 | let capacity = opts.capacity.ok_or("capacity not set")?; 21 | let rotate_at = opts.rotate_at.ok_or("rotate_at not set")?; 22 | let items_count = opts.items_count.ok_or("items_count not set")?; 23 | 24 | let mut filters = Vec::with_capacity(capacity); 25 | for _ in 0..capacity { 26 | let filter = match opts { 27 | FilterOptions { 28 | bitmap_size: Some(bitmap_size), 29 | items_count: Some(items_count), 30 | fp_rate: None, 31 | .. 32 | } => Bloom::new(bitmap_size, items_count), 33 | FilterOptions { 34 | bitmap_size: None, 35 | items_count: Some(items_count), 36 | fp_rate: Some(fp_rate), 37 | .. 38 | } => Bloom::new_for_fp_rate(items_count, fp_rate), 39 | _ => { 40 | return Err(format!( 41 | "must set `items_count` AND (`fp_rate` OR `bitmap_size`)], got {:?}", 42 | opts 43 | )) 44 | } 45 | }; 46 | filters.push(filter) 47 | } 48 | 49 | Ok(ForgetfulFilter { 50 | filters, 51 | items_count, 52 | capacity, 53 | rotate_at, 54 | insertion_count: 0, 55 | }) 56 | } 57 | 58 | pub fn set(&mut self, key: &[u8]) -> bool { 59 | let num_inner_filters = self.filters.len(); 60 | 61 | // check membership 62 | let mut member = false; 63 | // check the overlapping blooms 2 by 2 64 | for x in 0..num_inner_filters - 2 { 65 | if self.filters[x].check(key) && self.filters[x + 1].check(key) { 66 | member = true; 67 | break; 68 | } 69 | } 70 | 71 | if !member { 72 | // check last bloom 73 | member = self.filters[num_inner_filters - 1].check(key); 74 | } 75 | if !member { 76 | self.insertion_count += 1; 77 | if self.insertion_count >= self.rotate_at { 78 | self.insertion_count = 0; 79 | // rotate the oldest bloom to the start of the list 80 | // and clear it 81 | self.filters.rotate_right(1); 82 | self.filters[0].clear(); 83 | } 84 | // set in the future and current 85 | self.filters[0].set(key); 86 | self.filters[1].set(key); 87 | } 88 | 89 | member 90 | } 91 | 92 | pub fn check(&self, key: &[u8]) -> bool { 93 | let num_inner_filters = self.filters.len(); 94 | // check the overlapping blooms 2 by 2 95 | for x in 0..num_inner_filters - 2 { 96 | if self.filters[x].check(&key) && self.filters[x + 1].check(&key) { 97 | return true; 98 | } 99 | } 100 | // check last bloom 101 | self.filters[num_inner_filters - 1].check(&key) 102 | } 103 | 104 | pub fn clear(&mut self) { 105 | let num_inner_filters = self.filters.len(); 106 | for x in 0..num_inner_filters - 2 { 107 | self.filters[x].clear() 108 | } 109 | } 110 | 111 | pub fn serialize(&self) -> Result> { 112 | let mut opts = FilterOptions::default(); 113 | opts.filter_type = Some(FilterType::Forgetful); 114 | opts.items_count = Some(self.items_count); 115 | opts.capacity = Some(self.capacity); 116 | opts.rotate_at = Some(self.rotate_at); 117 | 118 | let mut serialized_filters: Vec = vec![]; 119 | for i in 0..self.capacity { 120 | let filter = &self.filters[i]; 121 | let sips = filter.sip_keys(); 122 | let bitmap = filter.bitmap(); 123 | serialized_filters.push(RawSerializedFilter { 124 | payload: bitmap, 125 | num_bits: filter.number_of_bits(), 126 | num_funs: filter.number_of_hash_functions(), 127 | sip00: sips[0].0, 128 | sip01: sips[0].1, 129 | sip10: sips[1].0, 130 | sip11: sips[1].1, 131 | }) 132 | } 133 | bincode::serialize(&SerializedFilter { 134 | filters: serialized_filters, 135 | opts, 136 | upsert_num: self.insertion_count as u64, 137 | }) 138 | .map_err(|e| format!("bincode serialization failed with: {}", e)) 139 | } 140 | 141 | pub fn restore(prev_filter: SerializedFilter) -> ForgetfulFilter { 142 | let mut filters: Vec> = vec![]; 143 | 144 | for i in 0..prev_filter.opts.capacity.unwrap() { 145 | let pf = &prev_filter.filters.to_vec()[i]; 146 | filters.push(Bloom::from_existing( 147 | &pf.payload, 148 | pf.num_bits, 149 | pf.num_funs, 150 | [(pf.sip00, pf.sip01), (pf.sip10, pf.sip11)], 151 | )) 152 | } 153 | 154 | ForgetfulFilter { 155 | filters, 156 | items_count: prev_filter.opts.items_count.unwrap(), 157 | capacity: prev_filter.opts.capacity.unwrap(), 158 | rotate_at: prev_filter.opts.rotate_at.unwrap(), 159 | insertion_count: prev_filter.upsert_num as usize, 160 | } 161 | } 162 | } 163 | -------------------------------------------------------------------------------- /src/bloom.erl: -------------------------------------------------------------------------------- 1 | -module(bloom). 2 | 3 | -export([ 4 | new/2, 5 | new_optimal/2, 6 | new_forgetful/4, 7 | new_forgetful_optimal/4, 8 | set/2, 9 | check/2, 10 | check_and_set/2, 11 | clear/1, 12 | type/1, 13 | serialize/1, 14 | deserialize/1 15 | ]). 16 | 17 | %% ---------------------------------------------------------------------------- 18 | %% @doc 19 | %% Create a new bloom filter structure. 20 | %% `BitmapSize' is the size in bytes (not bits) that will be allocated in memory 21 | %% `ItemsCount' is an estimation of the maximum number of items to store. 22 | %% @end 23 | %% ---------------------------------------------------------------------------- 24 | -spec new(BitmapSize :: pos_integer(), ItemsCount :: pos_integer()) -> {ok, Bloom :: bloom_nif:bloom()}. 25 | new(BitmapSize, ItemsCount) -> 26 | bloom_nif:new(#{ 27 | filter_type => bloom, 28 | bitmap_size => BitmapSize, 29 | items_count => ItemsCount 30 | }). 31 | 32 | %% ---------------------------------------------------------------------------- 33 | %% @doc 34 | %% Create a new bloom filter structure. 35 | %% `ItemsCount' is an estimation of the maximum number of items to store. 36 | %% `FalsePositiveRate' is the wanted rate of false positives, in [0.0, 1.0]. 37 | %% @end 38 | %% ---------------------------------------------------------------------------- 39 | -spec new_optimal(ItemsCount :: pos_integer(), FalsePositiveRate :: float()) -> {ok, Bloom :: bloom_nif:bloom()}. 40 | new_optimal(ItemsCount, FalsePositiveRate) when FalsePositiveRate >= 0.0 andalso FalsePositiveRate =< 1.0 -> 41 | bloom_nif:new(#{ 42 | filter_type => bloom, 43 | items_count => ItemsCount, 44 | fp_rate => FalsePositiveRate 45 | }). 46 | 47 | %% ---------------------------------------------------------------------------- 48 | %% @doc 49 | %% Create a new forgetful bloom filter structure. 50 | %% `BitmapSize' is the size in bytes (not bits) that will be allocated in memory 51 | %% `ItemsCount' is an estimation of the maximum number of items to store, 52 | %% `NumFilters' is the number of filters to maintain (minimum of 3) and 53 | %% `RotateAfter' is how many insertions to do into a filter before rotating a blank filter into the `future' position. 54 | %% @end 55 | %% ---------------------------------------------------------------------------- 56 | -spec new_forgetful(BitmapSize :: pos_integer(), ItemsCount :: pos_integer(), NumFilters :: pos_integer(), RotateAfter :: pos_integer()) 57 | -> {ok, Bloom :: bloom_nif:bloom()}. 58 | new_forgetful(BitmapSize, ItemsCount, NumFilters, RotateAfter) when NumFilters > 2 -> 59 | bloom_nif:new(#{ 60 | filter_type => fbf, 61 | bitmap_size => BitmapSize, 62 | items_count => ItemsCount, 63 | capacity => NumFilters, 64 | rotate_at => RotateAfter 65 | }). 66 | 67 | %% ---------------------------------------------------------------------------- 68 | %% @doc 69 | %% Create a new forgetful bloom filter structure. 70 | %% `BitmapSize' is the size in bytes (not bits) that will be allocated in memory 71 | %% `ItemsCount' is an estimation of the maximum number of items to store, 72 | %% `NumFilters' is the number of filters to maintain (minimum of 3) and 73 | %% `RotateAfter' is how many insertions to do into a filter before rotating a blank filter into the `future' position. 74 | %% `FalsePositiveRate' is the wanted rate of false positives, in [0.0, 1.0]. 75 | %% @end 76 | %% ---------------------------------------------------------------------------- 77 | -spec new_forgetful_optimal(ItemsCount :: pos_integer(), NumFilters :: pos_integer(), RotateAfter :: pos_integer(), FalsePositiveRate :: float()) 78 | -> {ok, Bloom :: bloom_nif:bloom()}. 79 | new_forgetful_optimal(ItemsCount, NumFilters, RotateAfter, FalsePositiveRate) when NumFilters > 2 andalso FalsePositiveRate >= 0.0 andalso FalsePositiveRate =< 1.0 -> 80 | bloom_nif:new(#{ 81 | filter_type => fbf, 82 | items_count => ItemsCount, 83 | capacity => NumFilters, 84 | rotate_at => RotateAfter, 85 | fp_rate => FalsePositiveRate 86 | }). 87 | 88 | %% ---------------------------------------------------------------------------- 89 | %% @doc 90 | %% Record the presence of `Key' in `Bloom' and `ForgetfulBloom' 91 | %% For `ForgetfulBloom' a boolean is returned to indicate if the value was already present (like `check_and_set/2'). 92 | %% @end 93 | %% ---------------------------------------------------------------------------- 94 | -spec set(Bloom :: bloom_nif:bloom(), Key :: term()) -> ok | boolean(). 95 | set(Bloom, Key) -> 96 | bloom_nif:set(Bloom, Key). 97 | 98 | %% ---------------------------------------------------------------------------- 99 | %% @doc 100 | %% Check for the presence of `Key' in `Bloom'. 101 | %% Serialized and binary encoded bloom filters can be used with this 102 | %% function when you wish to check for the key and do not need to use set 103 | %% (eg. a static bloom filter stored in a database). 104 | %% @end 105 | %% ---------------------------------------------------------------------------- 106 | -spec check(Bloom :: bloom_nif:bloom() | bloom_nif:serialized_bloom(), Key :: term()) -> boolean(). 107 | check(Bloom, Key) -> 108 | bloom_nif:check(Bloom, Key). 109 | 110 | %% ---------------------------------------------------------------------------- 111 | %% @doc 112 | %% Record the presence of `Key' in `Bloom' or `ForgetfulBloom' 113 | %% and return whether it was present before. 114 | %% @end 115 | %% ---------------------------------------------------------------------------- 116 | -spec check_and_set(Bloom :: bloom_nif:bloom(), Key :: term()) -> boolean(). 117 | check_and_set(Bloom, Key) -> 118 | bloom_nif:check_and_set(Bloom, Key). 119 | 120 | %% ---------------------------------------------------------------------------- 121 | %% @doc 122 | %% Clear all of the bits in the filter, removing all keys from the set. 123 | %% @end 124 | %% ---------------------------------------------------------------------------- 125 | -spec clear(Bloom :: bloom_nif:bloom()) -> ok. 126 | clear(Bloom) -> 127 | bloom_nif:clear(Bloom). 128 | 129 | %% ---------------------------------------------------------------------------- 130 | %% @doc 131 | %% Get type of filter. 132 | %% @end 133 | %% ---------------------------------------------------------------------------- 134 | -spec type(Bloom :: bloom_nif:bloom()) -> number() | {error, Reason :: binary()}. 135 | type(Bloom) -> 136 | bloom_nif:ftype(Bloom). 137 | 138 | %% ---------------------------------------------------------------------------- 139 | %% @doc 140 | %% Serialize a bloom filter to binary. 141 | %% `check/2' can be used against this serialized form efficiently. 142 | %% @end 143 | %% ---------------------------------------------------------------------------- 144 | -spec serialize(Bloom :: bloom_nif:bloom()) -> {ok, bloom_nif:serialized_bloom()}. 145 | serialize(Bloom) -> 146 | bloom_nif:serialize(Bloom). 147 | 148 | %% ---------------------------------------------------------------------------- 149 | %% @doc 150 | %% Deserialize a previously serialized bloom filter back 151 | %% into a bloom filter reference. 152 | %% @end 153 | %% ---------------------------------------------------------------------------- 154 | -spec deserialize(SerializedBloom :: bloom_nif:serialized_bloom()) -> {ok, bloom_nif:bloom()}. 155 | deserialize(SerializedBloom) -> 156 | bloom_nif:deserialize(SerializedBloom). -------------------------------------------------------------------------------- /crates/bloom/src/nif.rs: -------------------------------------------------------------------------------- 1 | use std::io::Write; 2 | use std::sync::{RwLock, RwLockReadGuard, RwLockWriteGuard}; 3 | 4 | use rustler::resource::ResourceArc; 5 | use rustler::{Binary, Encoder, Env, NifResult, OwnedBinary, Term}; 6 | 7 | use atoms::{bindecode, binencode, error, ok, wrong_filter_type}; 8 | use container::SerializedFilter; 9 | use filter::{BloomFilter, Filter, FilterType}; 10 | use options::FilterOptions; 11 | 12 | // ================================================================================================= 13 | // resource 14 | // ================================================================================================= 15 | 16 | #[repr(transparent)] 17 | struct FilterResource(RwLock); 18 | 19 | impl FilterResource { 20 | fn read(&self) -> RwLockReadGuard<'_, Filter> { 21 | self.0.read().unwrap() 22 | } 23 | 24 | fn write(&self) -> RwLockWriteGuard<'_, Filter> { 25 | self.0.write().unwrap() 26 | } 27 | } 28 | 29 | impl From for FilterResource { 30 | fn from(other: Filter) -> Self { 31 | FilterResource(RwLock::new(other)) 32 | } 33 | } 34 | 35 | pub fn on_load(env: Env, _load_info: Term) -> bool { 36 | rustler::resource!(FilterResource, env); 37 | true 38 | } 39 | 40 | // ================================================================================================= 41 | // api 42 | // ================================================================================================= 43 | 44 | #[rustler::nif] 45 | fn new<'a>(env: Env<'a>, opts: FilterOptions) -> NifResult> { 46 | let filt = Filter::new(opts).map_err(|e| rustler::error::Error::Term(Box::new(e)))?; 47 | Ok((ok(), ResourceArc::new(FilterResource::from(filt))).encode(env)) 48 | } 49 | 50 | #[rustler::nif] 51 | fn ftype<'a>(env: Env<'a>, resource: ResourceArc) -> NifResult> { 52 | let filt_guard = resource.read(); 53 | Ok((filt_guard.filter_type() as u32).encode(env)) 54 | } 55 | 56 | #[rustler::nif(name = "serialize", schedule = "DirtyIo")] 57 | fn serialize<'a>(env: Env<'a>, resource: ResourceArc) -> NifResult> { 58 | let serialized = resource.read().serialize(); 59 | match serialized { 60 | Ok(bin_vec) => { 61 | let mut binary = OwnedBinary::new(bin_vec.len()).unwrap(); 62 | binary.as_mut_slice().write_all(&bin_vec).unwrap(); 63 | Ok((ok(), Binary::from_owned(binary, env)).encode(env)) 64 | } 65 | Err(_e) => Ok((error(), binencode()).encode(env)), 66 | } 67 | } 68 | 69 | #[rustler::nif(name = "deserialize", schedule = "DirtyIo")] 70 | fn deserialize<'a>(env: Env<'a>, serialized: LazyBinary<'a>) -> NifResult> { 71 | match bincode::deserialize::(&serialized) { 72 | Ok(f) => Ok(( 73 | ok(), 74 | ResourceArc::new(FilterResource::from(Filter::restore(f).unwrap())), 75 | ) 76 | .encode(env)), 77 | Err(_e) => Ok((error(), bindecode()).encode(env)), 78 | } 79 | } 80 | 81 | #[rustler::nif] 82 | fn set<'a>(env: Env<'a>, resource: ResourceArc, key: LazyBinary<'a>) -> NifResult> { 83 | let mut filt_guard = resource.write(); 84 | match &mut *filt_guard { 85 | Filter::Forgetful(filt) => { 86 | let member = filt.set(&key); 87 | Ok(member.encode(env)) 88 | } 89 | Filter::Bloom(filt) => { 90 | filt.set(&key); 91 | Ok(ok().encode(env)) 92 | } 93 | } 94 | } 95 | 96 | #[rustler::nif] 97 | fn vcheck<'a>(env: Env<'a>, resource: ResourceArc, key: LazyBinary<'a>) -> NifResult> { 98 | let filt_guard = resource.read(); 99 | Ok(filt_guard.check(&key).encode(env)) 100 | } 101 | 102 | #[rustler::nif] 103 | fn check_and_set<'a>( 104 | env: Env<'a>, 105 | resource: ResourceArc, 106 | key: LazyBinary<'a>, 107 | ) -> NifResult> { 108 | let mut filt_guard = resource.write(); 109 | match &mut *filt_guard { 110 | Filter::Bloom(filter) => Ok(filter.check_and_set(&key).encode(env)), 111 | _ => Ok((error(), wrong_filter_type()).encode(env)), 112 | } 113 | } 114 | 115 | #[rustler::nif] 116 | fn clear<'a>(env: Env<'a>, resource: ResourceArc) -> NifResult> { 117 | resource.write().clear(); 118 | Ok(ok().encode(env)) 119 | } 120 | 121 | // check a serialized bloom for key membership without fully deserializing the bloom 122 | // specifically we want to avoid the very slow bitvec deserialization and simply compute 123 | // the hash keys manually and check them inside the Erlang binary by hand 124 | // for a 50mb bloom, this improves checking a serialized bloom from 25 seconds to 35 microseconds 125 | #[rustler::nif] 126 | fn check_serialized<'a>(env: Env<'a>, serialized: LazyBinary<'a>, key: LazyBinary<'a>) -> NifResult> { 127 | match bincode::deserialize::(&serialized) { 128 | Ok(f) => match f.opts.filter_type { 129 | Some(FilterType::Bloom) => { 130 | // TODO: The following block contains values 131 | // originally filled in by 132 | // `FilterOptions::default()`. Are these values 133 | // meaningful in any away? Is there a more elegent 134 | // solution to keep this `check_serialized()` working 135 | // correctly? 136 | // 137 | let opts = { 138 | let mut opts = FilterOptions::default(); 139 | opts.filter_type = Some(FilterType::Bloom); 140 | opts.items_count = Some(100); 141 | opts.bitmap_size = Some(10); 142 | opts 143 | }; 144 | let filter = BloomFilter::new(opts).unwrap(); 145 | Ok((filter.check_serialized(f, &key)).encode(env)) 146 | } 147 | _ => Ok((error(), wrong_filter_type()).encode(env)), 148 | }, 149 | Err(_e) => Ok((error(), bindecode()).encode(env)), 150 | } 151 | } 152 | 153 | 154 | // ================================================================================================= 155 | // helpers 156 | // ================================================================================================= 157 | 158 | /// Represents either a borrowed `Binary` or `OwnedBinary`. 159 | /// 160 | /// `LazyBinary` allows for the most efficient conversion from an 161 | /// Erlang term to a byte slice. If the term is an actual Erlang 162 | /// binary, constructing `LazyBinary` is essentially 163 | /// zero-cost. However, if the term is any other Erlang type, it is 164 | /// converted to an `OwnedBinary`, which requires a heap allocation. 165 | enum LazyBinary<'a> { 166 | Owned(OwnedBinary), 167 | Borrowed(Binary<'a>), 168 | } 169 | 170 | impl<'a> std::ops::Deref for LazyBinary<'a> { 171 | type Target = [u8]; 172 | fn deref(&self) -> &[u8] { 173 | match self { 174 | Self::Owned(owned) => owned.as_ref(), 175 | Self::Borrowed(borrowed) => borrowed.as_ref(), 176 | } 177 | } 178 | } 179 | 180 | impl<'a> rustler::Decoder<'a> for LazyBinary<'a> { 181 | fn decode(term: Term<'a>) -> NifResult { 182 | if term.is_binary() { 183 | Ok(Self::Borrowed(Binary::from_term(term)?)) 184 | } else { 185 | Ok(Self::Owned(term.to_binary())) 186 | } 187 | } 188 | } 189 | -------------------------------------------------------------------------------- /crates/bloom/Cargo.lock: -------------------------------------------------------------------------------- 1 | # This file is automatically @generated by Cargo. 2 | # It is not intended for manual editing. 3 | [[package]] 4 | name = "bincode" 5 | version = "1.2.1" 6 | source = "registry+https://github.com/rust-lang/crates.io-index" 7 | dependencies = [ 8 | "byteorder 1.3.4 (registry+https://github.com/rust-lang/crates.io-index)", 9 | "serde 1.0.110 (registry+https://github.com/rust-lang/crates.io-index)", 10 | ] 11 | 12 | [[package]] 13 | name = "bit-vec" 14 | version = "0.6.1" 15 | source = "registry+https://github.com/rust-lang/crates.io-index" 16 | 17 | [[package]] 18 | name = "bloom" 19 | version = "0.2.0" 20 | dependencies = [ 21 | "bincode 1.2.1 (registry+https://github.com/rust-lang/crates.io-index)", 22 | "bloomfilter 1.0.2 (registry+https://github.com/rust-lang/crates.io-index)", 23 | "rustler 0.22.0-rc.0 (registry+https://github.com/rust-lang/crates.io-index)", 24 | "serde 1.0.110 (registry+https://github.com/rust-lang/crates.io-index)", 25 | "siphasher 0.3.3 (registry+https://github.com/rust-lang/crates.io-index)", 26 | ] 27 | 28 | [[package]] 29 | name = "bloomfilter" 30 | version = "1.0.2" 31 | source = "registry+https://github.com/rust-lang/crates.io-index" 32 | dependencies = [ 33 | "bit-vec 0.6.1 (registry+https://github.com/rust-lang/crates.io-index)", 34 | "rand 0.7.3 (registry+https://github.com/rust-lang/crates.io-index)", 35 | "siphasher 0.3.3 (registry+https://github.com/rust-lang/crates.io-index)", 36 | ] 37 | 38 | [[package]] 39 | name = "byteorder" 40 | version = "1.3.4" 41 | source = "registry+https://github.com/rust-lang/crates.io-index" 42 | 43 | [[package]] 44 | name = "c2-chacha" 45 | version = "0.2.3" 46 | source = "registry+https://github.com/rust-lang/crates.io-index" 47 | dependencies = [ 48 | "ppv-lite86 0.2.6 (registry+https://github.com/rust-lang/crates.io-index)", 49 | ] 50 | 51 | [[package]] 52 | name = "cfg-if" 53 | version = "0.1.10" 54 | source = "registry+https://github.com/rust-lang/crates.io-index" 55 | 56 | [[package]] 57 | name = "getrandom" 58 | version = "0.1.14" 59 | source = "registry+https://github.com/rust-lang/crates.io-index" 60 | dependencies = [ 61 | "cfg-if 0.1.10 (registry+https://github.com/rust-lang/crates.io-index)", 62 | "libc 0.2.66 (registry+https://github.com/rust-lang/crates.io-index)", 63 | "wasi 0.9.0+wasi-snapshot-preview1 (registry+https://github.com/rust-lang/crates.io-index)", 64 | ] 65 | 66 | [[package]] 67 | name = "heck" 68 | version = "0.3.1" 69 | source = "registry+https://github.com/rust-lang/crates.io-index" 70 | dependencies = [ 71 | "unicode-segmentation 1.6.0 (registry+https://github.com/rust-lang/crates.io-index)", 72 | ] 73 | 74 | [[package]] 75 | name = "lazy_static" 76 | version = "1.4.0" 77 | source = "registry+https://github.com/rust-lang/crates.io-index" 78 | 79 | [[package]] 80 | name = "libc" 81 | version = "0.2.66" 82 | source = "registry+https://github.com/rust-lang/crates.io-index" 83 | 84 | [[package]] 85 | name = "ppv-lite86" 86 | version = "0.2.6" 87 | source = "registry+https://github.com/rust-lang/crates.io-index" 88 | 89 | [[package]] 90 | name = "proc-macro2" 91 | version = "1.0.13" 92 | source = "registry+https://github.com/rust-lang/crates.io-index" 93 | dependencies = [ 94 | "unicode-xid 0.2.0 (registry+https://github.com/rust-lang/crates.io-index)", 95 | ] 96 | 97 | [[package]] 98 | name = "quote" 99 | version = "1.0.6" 100 | source = "registry+https://github.com/rust-lang/crates.io-index" 101 | dependencies = [ 102 | "proc-macro2 1.0.13 (registry+https://github.com/rust-lang/crates.io-index)", 103 | ] 104 | 105 | [[package]] 106 | name = "rand" 107 | version = "0.7.3" 108 | source = "registry+https://github.com/rust-lang/crates.io-index" 109 | dependencies = [ 110 | "getrandom 0.1.14 (registry+https://github.com/rust-lang/crates.io-index)", 111 | "libc 0.2.66 (registry+https://github.com/rust-lang/crates.io-index)", 112 | "rand_chacha 0.2.1 (registry+https://github.com/rust-lang/crates.io-index)", 113 | "rand_core 0.5.1 (registry+https://github.com/rust-lang/crates.io-index)", 114 | "rand_hc 0.2.0 (registry+https://github.com/rust-lang/crates.io-index)", 115 | ] 116 | 117 | [[package]] 118 | name = "rand_chacha" 119 | version = "0.2.1" 120 | source = "registry+https://github.com/rust-lang/crates.io-index" 121 | dependencies = [ 122 | "c2-chacha 0.2.3 (registry+https://github.com/rust-lang/crates.io-index)", 123 | "rand_core 0.5.1 (registry+https://github.com/rust-lang/crates.io-index)", 124 | ] 125 | 126 | [[package]] 127 | name = "rand_core" 128 | version = "0.5.1" 129 | source = "registry+https://github.com/rust-lang/crates.io-index" 130 | dependencies = [ 131 | "getrandom 0.1.14 (registry+https://github.com/rust-lang/crates.io-index)", 132 | ] 133 | 134 | [[package]] 135 | name = "rand_hc" 136 | version = "0.2.0" 137 | source = "registry+https://github.com/rust-lang/crates.io-index" 138 | dependencies = [ 139 | "rand_core 0.5.1 (registry+https://github.com/rust-lang/crates.io-index)", 140 | ] 141 | 142 | [[package]] 143 | name = "rustler" 144 | version = "0.22.0-rc.0" 145 | source = "registry+https://github.com/rust-lang/crates.io-index" 146 | dependencies = [ 147 | "lazy_static 1.4.0 (registry+https://github.com/rust-lang/crates.io-index)", 148 | "rustler_codegen 0.22.0-rc.0 (registry+https://github.com/rust-lang/crates.io-index)", 149 | "rustler_sys 2.1.0 (registry+https://github.com/rust-lang/crates.io-index)", 150 | ] 151 | 152 | [[package]] 153 | name = "rustler_codegen" 154 | version = "0.22.0-rc.0" 155 | source = "registry+https://github.com/rust-lang/crates.io-index" 156 | dependencies = [ 157 | "heck 0.3.1 (registry+https://github.com/rust-lang/crates.io-index)", 158 | "proc-macro2 1.0.13 (registry+https://github.com/rust-lang/crates.io-index)", 159 | "quote 1.0.6 (registry+https://github.com/rust-lang/crates.io-index)", 160 | "syn 1.0.22 (registry+https://github.com/rust-lang/crates.io-index)", 161 | ] 162 | 163 | [[package]] 164 | name = "rustler_sys" 165 | version = "2.1.0" 166 | source = "registry+https://github.com/rust-lang/crates.io-index" 167 | dependencies = [ 168 | "unreachable 1.0.0 (registry+https://github.com/rust-lang/crates.io-index)", 169 | ] 170 | 171 | [[package]] 172 | name = "serde" 173 | version = "1.0.110" 174 | source = "registry+https://github.com/rust-lang/crates.io-index" 175 | dependencies = [ 176 | "serde_derive 1.0.110 (registry+https://github.com/rust-lang/crates.io-index)", 177 | ] 178 | 179 | [[package]] 180 | name = "serde_derive" 181 | version = "1.0.110" 182 | source = "registry+https://github.com/rust-lang/crates.io-index" 183 | dependencies = [ 184 | "proc-macro2 1.0.13 (registry+https://github.com/rust-lang/crates.io-index)", 185 | "quote 1.0.6 (registry+https://github.com/rust-lang/crates.io-index)", 186 | "syn 1.0.22 (registry+https://github.com/rust-lang/crates.io-index)", 187 | ] 188 | 189 | [[package]] 190 | name = "siphasher" 191 | version = "0.3.3" 192 | source = "registry+https://github.com/rust-lang/crates.io-index" 193 | 194 | [[package]] 195 | name = "syn" 196 | version = "1.0.22" 197 | source = "registry+https://github.com/rust-lang/crates.io-index" 198 | dependencies = [ 199 | "proc-macro2 1.0.13 (registry+https://github.com/rust-lang/crates.io-index)", 200 | "quote 1.0.6 (registry+https://github.com/rust-lang/crates.io-index)", 201 | "unicode-xid 0.2.0 (registry+https://github.com/rust-lang/crates.io-index)", 202 | ] 203 | 204 | [[package]] 205 | name = "unicode-segmentation" 206 | version = "1.6.0" 207 | source = "registry+https://github.com/rust-lang/crates.io-index" 208 | 209 | [[package]] 210 | name = "unicode-xid" 211 | version = "0.2.0" 212 | source = "registry+https://github.com/rust-lang/crates.io-index" 213 | 214 | [[package]] 215 | name = "unreachable" 216 | version = "1.0.0" 217 | source = "registry+https://github.com/rust-lang/crates.io-index" 218 | dependencies = [ 219 | "void 1.0.2 (registry+https://github.com/rust-lang/crates.io-index)", 220 | ] 221 | 222 | [[package]] 223 | name = "void" 224 | version = "1.0.2" 225 | source = "registry+https://github.com/rust-lang/crates.io-index" 226 | 227 | [[package]] 228 | name = "wasi" 229 | version = "0.9.0+wasi-snapshot-preview1" 230 | source = "registry+https://github.com/rust-lang/crates.io-index" 231 | 232 | [metadata] 233 | "checksum bincode 1.2.1 (registry+https://github.com/rust-lang/crates.io-index)" = "5753e2a71534719bf3f4e57006c3a4f0d2c672a4b676eec84161f763eca87dbf" 234 | "checksum bit-vec 0.6.1 (registry+https://github.com/rust-lang/crates.io-index)" = "a4523a10839ffae575fb08aa3423026c8cb4687eef43952afb956229d4f246f7" 235 | "checksum bloomfilter 1.0.2 (registry+https://github.com/rust-lang/crates.io-index)" = "b14238e7ff7b94d429366d4eecd668fd8fe81ffac7f334720d0edbdd87408428" 236 | "checksum byteorder 1.3.4 (registry+https://github.com/rust-lang/crates.io-index)" = "08c48aae112d48ed9f069b33538ea9e3e90aa263cfa3d1c24309612b1f7472de" 237 | "checksum c2-chacha 0.2.3 (registry+https://github.com/rust-lang/crates.io-index)" = "214238caa1bf3a496ec3392968969cab8549f96ff30652c9e56885329315f6bb" 238 | "checksum cfg-if 0.1.10 (registry+https://github.com/rust-lang/crates.io-index)" = "4785bdd1c96b2a846b2bd7cc02e86b6b3dbf14e7e53446c4f54c92a361040822" 239 | "checksum getrandom 0.1.14 (registry+https://github.com/rust-lang/crates.io-index)" = "7abc8dd8451921606d809ba32e95b6111925cd2906060d2dcc29c070220503eb" 240 | "checksum heck 0.3.1 (registry+https://github.com/rust-lang/crates.io-index)" = "20564e78d53d2bb135c343b3f47714a56af2061f1c928fdb541dc7b9fdd94205" 241 | "checksum lazy_static 1.4.0 (registry+https://github.com/rust-lang/crates.io-index)" = "e2abad23fbc42b3700f2f279844dc832adb2b2eb069b2df918f455c4e18cc646" 242 | "checksum libc 0.2.66 (registry+https://github.com/rust-lang/crates.io-index)" = "d515b1f41455adea1313a4a2ac8a8a477634fbae63cc6100e3aebb207ce61558" 243 | "checksum ppv-lite86 0.2.6 (registry+https://github.com/rust-lang/crates.io-index)" = "74490b50b9fbe561ac330df47c08f3f33073d2d00c150f719147d7c54522fa1b" 244 | "checksum proc-macro2 1.0.13 (registry+https://github.com/rust-lang/crates.io-index)" = "53f5ffe53a6b28e37c9c1ce74893477864d64f74778a93a4beb43c8fa167f639" 245 | "checksum quote 1.0.6 (registry+https://github.com/rust-lang/crates.io-index)" = "54a21852a652ad6f610c9510194f398ff6f8692e334fd1145fed931f7fbe44ea" 246 | "checksum rand 0.7.3 (registry+https://github.com/rust-lang/crates.io-index)" = "6a6b1679d49b24bbfe0c803429aa1874472f50d9b363131f0e89fc356b544d03" 247 | "checksum rand_chacha 0.2.1 (registry+https://github.com/rust-lang/crates.io-index)" = "03a2a90da8c7523f554344f921aa97283eadf6ac484a6d2a7d0212fa7f8d6853" 248 | "checksum rand_core 0.5.1 (registry+https://github.com/rust-lang/crates.io-index)" = "90bde5296fc891b0cef12a6d03ddccc162ce7b2aff54160af9338f8d40df6d19" 249 | "checksum rand_hc 0.2.0 (registry+https://github.com/rust-lang/crates.io-index)" = "ca3129af7b92a17112d59ad498c6f81eaf463253766b90396d39ea7a39d6613c" 250 | "checksum rustler 0.22.0-rc.0 (registry+https://github.com/rust-lang/crates.io-index)" = "6627953c4983f7808f569d4df17d7de55893ed1e1a85ca4ef5ab95569b83832e" 251 | "checksum rustler_codegen 0.22.0-rc.0 (registry+https://github.com/rust-lang/crates.io-index)" = "0b10e7791e788906c4394c980022210fbbeb75e75f7d9166b7bd0169e194ed4d" 252 | "checksum rustler_sys 2.1.0 (registry+https://github.com/rust-lang/crates.io-index)" = "9fb96034ff33723615fd19223d58c987c1f6476342e83557a6e467ef95f83bda" 253 | "checksum serde 1.0.110 (registry+https://github.com/rust-lang/crates.io-index)" = "99e7b308464d16b56eba9964e4972a3eee817760ab60d88c3f86e1fecb08204c" 254 | "checksum serde_derive 1.0.110 (registry+https://github.com/rust-lang/crates.io-index)" = "818fbf6bfa9a42d3bfcaca148547aa00c7b915bec71d1757aa2d44ca68771984" 255 | "checksum siphasher 0.3.3 (registry+https://github.com/rust-lang/crates.io-index)" = "fa8f3741c7372e75519bd9346068370c9cdaabcc1f9599cbcf2a2719352286b7" 256 | "checksum syn 1.0.22 (registry+https://github.com/rust-lang/crates.io-index)" = "1425de3c33b0941002740a420b1a906a350b88d08b82b2c8a01035a3f9447bac" 257 | "checksum unicode-segmentation 1.6.0 (registry+https://github.com/rust-lang/crates.io-index)" = "e83e153d1053cbb5a118eeff7fd5be06ed99153f00dbcd8ae310c5fb2b22edc0" 258 | "checksum unicode-xid 0.2.0 (registry+https://github.com/rust-lang/crates.io-index)" = "826e7639553986605ec5979c7dd957c7895e93eabed50ab2ffa7f6128a75097c" 259 | "checksum unreachable 1.0.0 (registry+https://github.com/rust-lang/crates.io-index)" = "382810877fe448991dfc7f0dd6e3ae5d58088fd0ea5e35189655f84e6814fa56" 260 | "checksum void 1.0.2 (registry+https://github.com/rust-lang/crates.io-index)" = "6a02e4885ed3bc0f2de90ea6dd45ebcbb66dacffe03547fadbb0eeae2770887d" 261 | "checksum wasi 0.9.0+wasi-snapshot-preview1 (registry+https://github.com/rust-lang/crates.io-index)" = "cccddf32554fecc6acb585f82a32a72e28b48f8c4c1883ddfeeeaa96f7d8e519" 262 | --------------------------------------------------------------------------------