├── .gitmodules ├── .travis.yml ├── .travis ├── platform.sh ├── setenv_lua.sh └── setup_lua.sh ├── LICENSE ├── README.md ├── bloom_filter.lua └── test └── test.lua /.gitmodules: -------------------------------------------------------------------------------- 1 | [submodule "luaxxhash"] 2 | path = luaxxhash 3 | url = https://github.com/szensk/luaxxhash.git 4 | -------------------------------------------------------------------------------- /.travis.yml: -------------------------------------------------------------------------------- 1 | language: c 2 | 3 | sudo: false 4 | 5 | env: 6 | global: 7 | - LUAROCKS=2.3.0 8 | matrix: 9 | - LUA=luajit # latest stable version (2.0.4) 10 | - LUA=luajit2.0 # current head of 2.0 branch 11 | - LUA=luajit2.1 # current head of 2.1 branch 12 | 13 | branches: 14 | only: 15 | - master 16 | 17 | before_install: 18 | - source .travis/setenv_lua.sh 19 | - luarocks install lunitx 20 | 21 | script: 22 | - luajit test/test.lua 23 | 24 | notifications: 25 | email: 26 | on_success: change 27 | on_failure: always 28 | -------------------------------------------------------------------------------- /.travis/platform.sh: -------------------------------------------------------------------------------- 1 | if [ -z "${PLATFORM:-}" ]; then 2 | PLATFORM=$TRAVIS_OS_NAME; 3 | fi 4 | 5 | if [ "$PLATFORM" == "osx" ]; then 6 | PLATFORM="macosx"; 7 | fi 8 | 9 | if [ -z "$PLATFORM" ]; then 10 | if [ "$(uname)" == "Linux" ]; then 11 | PLATFORM="linux"; 12 | else 13 | PLATFORM="macosx"; 14 | fi; 15 | fi 16 | -------------------------------------------------------------------------------- /.travis/setenv_lua.sh: -------------------------------------------------------------------------------- 1 | export PATH=${PATH}:$HOME/.lua:$HOME/.local/bin:${TRAVIS_BUILD_DIR}/install/luarocks/bin 2 | bash .travis/setup_lua.sh 3 | eval `$HOME/.lua/luarocks path` 4 | -------------------------------------------------------------------------------- /.travis/setup_lua.sh: -------------------------------------------------------------------------------- 1 | #! /bin/bash 2 | 3 | # A script for setting up environment for travis-ci testing. 4 | # Sets up Lua and Luarocks. 5 | # LUA must be "lua5.1", "lua5.2" or "luajit". 6 | # luajit2.0 - master v2.0 7 | # luajit2.1 - master v2.1 8 | 9 | set -eufo pipefail 10 | 11 | LUAJIT_VERSION="2.0.4" 12 | LUAJIT_BASE="LuaJIT-$LUAJIT_VERSION" 13 | 14 | source .travis/platform.sh 15 | 16 | LUA_HOME_DIR=$TRAVIS_BUILD_DIR/install/lua 17 | 18 | LR_HOME_DIR=$TRAVIS_BUILD_DIR/install/luarocks 19 | 20 | mkdir $HOME/.lua 21 | 22 | LUAJIT="no" 23 | 24 | if [ "$PLATFORM" == "macosx" ]; then 25 | if [ "$LUA" == "luajit" ]; then 26 | LUAJIT="yes"; 27 | fi 28 | if [ "$LUA" == "luajit2.0" ]; then 29 | LUAJIT="yes"; 30 | fi 31 | if [ "$LUA" == "luajit2.1" ]; then 32 | LUAJIT="yes"; 33 | fi; 34 | elif [ "$(expr substr $LUA 1 6)" == "luajit" ]; then 35 | LUAJIT="yes"; 36 | fi 37 | 38 | mkdir -p "$LUA_HOME_DIR" 39 | 40 | if [ "$LUAJIT" == "yes" ]; then 41 | 42 | if [ "$LUA" == "luajit" ]; then 43 | curl --location https://github.com/LuaJIT/LuaJIT/archive/v$LUAJIT_VERSION.tar.gz | tar xz; 44 | else 45 | git clone https://github.com/LuaJIT/LuaJIT.git $LUAJIT_BASE; 46 | fi 47 | 48 | cd $LUAJIT_BASE 49 | 50 | if [ "$LUA" == "luajit2.1" ]; then 51 | git checkout v2.1; 52 | # force the INSTALL_TNAME to be luajit 53 | perl -i -pe 's/INSTALL_TNAME=.+/INSTALL_TNAME= luajit/' Makefile 54 | fi 55 | 56 | make && make install PREFIX="$LUA_HOME_DIR" 57 | 58 | ln -s $LUA_HOME_DIR/bin/luajit $HOME/.lua/luajit 59 | ln -s $LUA_HOME_DIR/bin/luajit $HOME/.lua/lua; 60 | 61 | else 62 | 63 | if [ "$LUA" == "lua5.1" ]; then 64 | curl http://www.lua.org/ftp/lua-5.1.5.tar.gz | tar xz 65 | cd lua-5.1.5; 66 | elif [ "$LUA" == "lua5.2" ]; then 67 | curl http://www.lua.org/ftp/lua-5.2.4.tar.gz | tar xz 68 | cd lua-5.2.4; 69 | elif [ "$LUA" == "lua5.3" ]; then 70 | curl http://www.lua.org/ftp/lua-5.3.2.tar.gz | tar xz 71 | cd lua-5.3.2; 72 | fi 73 | 74 | # Build Lua without backwards compatibility for testing 75 | perl -i -pe 's/-DLUA_COMPAT_(ALL|5_2)//' src/Makefile 76 | make $PLATFORM 77 | make INSTALL_TOP="$LUA_HOME_DIR" install; 78 | 79 | ln -s $LUA_HOME_DIR/bin/lua $HOME/.lua/lua 80 | ln -s $LUA_HOME_DIR/bin/luac $HOME/.lua/luac; 81 | 82 | fi 83 | 84 | cd $TRAVIS_BUILD_DIR 85 | 86 | lua -v 87 | 88 | LUAROCKS_BASE=luarocks-$LUAROCKS 89 | 90 | curl --location http://luarocks.org/releases/$LUAROCKS_BASE.tar.gz | tar xz 91 | 92 | cd $LUAROCKS_BASE 93 | 94 | if [ "$LUA" == "luajit" ]; then 95 | ./configure --lua-suffix=jit --with-lua-include="$LUA_HOME_DIR/include/luajit-2.0" --prefix="$LR_HOME_DIR"; 96 | elif [ "$LUA" == "luajit2.0" ]; then 97 | ./configure --lua-suffix=jit --with-lua-include="$LUA_HOME_DIR/include/luajit-2.0" --prefix="$LR_HOME_DIR"; 98 | elif [ "$LUA" == "luajit2.1" ]; then 99 | ./configure --lua-suffix=jit --with-lua-include="$LUA_HOME_DIR/include/luajit-2.1" --prefix="$LR_HOME_DIR"; 100 | else 101 | ./configure --with-lua="$LUA_HOME_DIR" --prefix="$LR_HOME_DIR" 102 | fi 103 | 104 | make build && make install 105 | 106 | ln -s $LR_HOME_DIR/bin/luarocks $HOME/.lua/luarocks 107 | 108 | cd $TRAVIS_BUILD_DIR 109 | 110 | luarocks --version 111 | 112 | rm -rf $LUAROCKS_BASE 113 | 114 | if [ "$LUAJIT" == "yes" ]; then 115 | rm -rf $LUAJIT_BASE; 116 | elif [ "$LUA" == "lua5.1" ]; then 117 | rm -rf lua-5.1.5; 118 | elif [ "$LUA" == "lua5.2" ]; then 119 | rm -rf lua-5.2.4; 120 | elif [ "$LUA" == "lua5.3" ]; then 121 | rm -rf lua-5.3.2; 122 | fi 123 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | The MIT License (MIT) 2 | 3 | Copyright (c) 2016 Vit Listik 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Lua bloom filter 2 | 3 | ![MIT license](https://img.shields.io/badge/license-MIT-blue.svg) 4 | ![tests](https://api.travis-ci.org/tivvit/pure-lua-bloom-filter.svg?branch=master) 5 | 6 | Pure luajit implementation of [bloom filter](https://en.wikipedia.org/wiki/Bloom_filter) (probabilistic data structure usable for storing many values effectively). 7 | 8 | - Based on https://github.com/mozilla-services/lua_bloom_filter 9 | - Using https://github.com/szensk/luaxxhash (reason why this is lujit only) 10 | - Thanks to https://github.com/moteus/lua-travis-example for lua travis example 11 | 12 | ## Install 13 | luaxxhash is provided as submodule - clone recursively 14 | 15 | `git clone --recursive` 16 | 17 | ## Example 18 | ```lua 19 | local bloom_filter = require "bloom_filter" 20 | -- store 100 items with maximal 1% error 21 | local bf = bloom_filter.new(100, 0.01) 22 | bf:query("a") -- 0 23 | bf:add("a") -- 1 - it was not present yet 24 | bf:query("a") -- 1 25 | bf:query("a") -- 0 26 | ``` 27 | ## API 28 | ### Create 29 | `BloomFilter.new(count, probability)` 30 | 31 | `BloomFilter.__new(count, probability)` You have to provide data manually 32 | 33 | ### Add 34 | `BloomFilter:add(value)` Returns 0 if already present else 1 35 | ### Query 36 | `BloomFilter:query(value)` Returns 1 if present else 0 37 | ### Clear 38 | `BloomFilter:clear(value)` Clears all data 39 | ### Store and load 40 | `local bf_store = BloomFilter:store(value)` Stores bloom filter to table with fields (data, items, probability) 41 | 42 | `BloomFilter.load(bf_store)` Load previously stored data 43 | ## Test 44 | 45 | `luajit test/test.lua` 46 | ## TODO 47 | - [ ] luarocks (any help would be very much appreciated) 48 | 49 | ### Development 50 | 51 | Feel free to contribute with PR. 52 | 53 | ### Copyright and License 54 | 55 | © 2016 [Vít Listík](http://tivvit.cz) 56 | 57 | Released under [MIT licence](https://github.com/tivvit/pure-lua-bloom-filter/blob/master/LICENSE) 58 | -------------------------------------------------------------------------------- /bloom_filter.lua: -------------------------------------------------------------------------------- 1 | package.path = package.path .. ';./luaxxhash/?.lua' 2 | local xxh32 = require("luaxxhash") 3 | local bit = require("bit") 4 | local BYTE_s = 8 5 | 6 | -- mathematical round 7 | function round(num) 8 | return math.floor(num + .5) 9 | end 10 | 11 | BloomFilter = {} 12 | BloomFilter.__index = BloomFilter 13 | 14 | function BloomFilter.__new(items, probability) 15 | assert(type(items) == "number", "items must be number") 16 | assert(type(probability) == "number", "probability must be number") 17 | 18 | assert(items > 0, "items must positive") 19 | assert(probability > 0 and probability < 1, 20 | "probability has to be between 0 and 1") 21 | 22 | local bits = math.ceil(items * math.log(probability) / 23 | math.log(1 / math.pow(2, math.log(2)))); 24 | local hashes = round(math.log(2) * bits / items); 25 | 26 | local bf = {} 27 | setmetatable(bf, BloomFilter) 28 | 29 | bf.items = items 30 | bf.bits = bits 31 | bf.bytes = math.ceil(bits / BYTE_s) + 1 -- only for AS bytes 32 | bf.hashes = hashes 33 | bf.probability = probability 34 | bf.data = nil 35 | 36 | return bf 37 | end 38 | 39 | function BloomFilter.new(items, probability) 40 | local bf = BloomFilter.__new(items, probability) 41 | bf.data = {} 42 | for i = 0, bf.bytes do 43 | bf.data[i] = 0 44 | end 45 | 46 | return bf 47 | end 48 | 49 | function BloomFilter.load(store) 50 | -- TODO check type of data (problems with AS bytes) 51 | assert(type(store.items) == "number", "items must be number") 52 | assert(type(store.probability) == "number", "probability must be number") 53 | local bf = BloomFilter.__new(store.items, store.probability) 54 | bf.data = store.data 55 | return bf 56 | end 57 | 58 | function BloomFilter:add(val) 59 | assert(self.data ~= nil, "BloomFilter wasn't initilized, call new first") 60 | val = tostring(val) 61 | local added = 0 62 | 63 | for i = 0, self.hashes do 64 | local b = xxh32(val, i) % self.bits; 65 | -- this is only because AS bytes (it has no 0 index) 66 | local pos = math.floor(b / BYTE_s) + 1 67 | local byte = self.data[pos] 68 | local shift = bit.lshift(1, b % BYTE_s) 69 | if not (bit.band(byte, shift) > 0) then 70 | self.data[pos] = bit.bor(byte, shift) 71 | added = 1 72 | end 73 | end 74 | 75 | return added 76 | end 77 | 78 | function BloomFilter:query(val) 79 | assert(self.data ~= nil, "BloomFilter wasn't initilized, call new first") 80 | val = tostring(val) 81 | local found = 1 82 | for i = 0, self.hashes do 83 | local b = xxh32(val, i) % self.bits; 84 | -- this is only because AS bytes (it has no 0 index) 85 | local pos = math.floor(b / BYTE_s) + 1 86 | local byte = self.data[pos] 87 | if not (bit.band(byte, bit.lshift(1, b % BYTE_s)) > 0) then 88 | return 0 89 | end 90 | end 91 | return found 92 | end 93 | 94 | function BloomFilter:clear() 95 | assert(self.data ~= nil, "BloomFilter wasn't initilized, call new first") 96 | for i = 0, self.bits do 97 | self.data[i] = 0 98 | end 99 | end 100 | 101 | function BloomFilter:store() 102 | assert(self.data ~= nil, "BloomFilter wasn't initilized, call new first") 103 | return { 104 | data = self.data, 105 | items = self.items, 106 | probability = self.probability, 107 | } 108 | end 109 | 110 | return BloomFilter 111 | -------------------------------------------------------------------------------- /test/test.lua: -------------------------------------------------------------------------------- 1 | print("------------------------------------") 2 | print("Lua version: " .. (jit and jit.version or _VERSION)) 3 | print("------------------------------------") 4 | print("") 5 | 6 | local HAS_RUNNER = not not lunit 7 | local lunit = require "lunit" 8 | local TEST_CASE = lunit.TEST_CASE 9 | 10 | local LUA_VER = _VERSION 11 | 12 | local _ENV = TEST_CASE "bloom_filter_test_case" 13 | 14 | BloomFilter = require "bloom_filter" 15 | 16 | -- is the World still sane? 17 | function test_true() 18 | assert_true(true) 19 | -- don't be so negative 20 | assert_false(false) 21 | end 22 | 23 | function test_new() 24 | assert_function(BloomFilter.new) 25 | local bf = BloomFilter.new(10, 0.1) 26 | -- assert_equal(10, bf.items) 27 | -- assert_equal(0.1, bf.probability) 28 | end 29 | 30 | function test_add() 31 | local bf = BloomFilter.new(10, 0.1) 32 | assert_equal(0, bf:query("a")) 33 | assert_equal(1, bf:add("a")) 34 | assert_equal(1, bf:query("a")) 35 | assert_equal(0, bf:add("a")) 36 | assert_equal(1, bf:query("a")) 37 | end 38 | 39 | function test_clear() 40 | local bf = BloomFilter.new(10, 0.1) 41 | assert_equal(0, bf:query("a")) 42 | bf:add("a") 43 | assert_equal(1, bf:query("a")) 44 | bf:clear() 45 | assert_equal(0, bf:query("a")) 46 | end 47 | 48 | function test_range() 49 | local bf = BloomFilter.new(10, 0.1) 50 | for i = 0, 10 do 51 | assert_equal(0, bf:query(i)) 52 | end 53 | 54 | -- add 6 55 | for i = 0, 5 do 56 | bf:add(i) 57 | end 58 | local found = 0 59 | for i = 0, 10 do 60 | found = found + bf:query(i) 61 | end 62 | assert_equal(6, found) 63 | 64 | -- add all 65 | for i = 0, 10 do 66 | bf:add(i) 67 | end 68 | local found = 0 69 | for i = 0, 10 do 70 | found = found + bf:query(i) 71 | end 72 | assert_equal(11, found) 73 | end 74 | 75 | function test_dream_big() 76 | local bf = BloomFilter.new(10000, 0.01) 77 | for i = 0, 10000 do 78 | assert_equal(0, bf:query(i)) 79 | end 80 | 81 | for i = 0, 10000 do 82 | bf:add(i) 83 | end 84 | local found = 0 85 | for i = 0, 10000 do 86 | found = found + bf:query(i) 87 | end 88 | assert_equal(10001, found) 89 | end 90 | 91 | function test_store() 92 | local bf = BloomFilter.new(10, 0.1) 93 | local store = bf:store() 94 | -- assert_equal(3, table.getn(store)) 95 | assert_equal(7, table.getn(store.data)) 96 | assert_equal(10, store.items) 97 | assert_equal(0.1, store.probability) 98 | 99 | bf:add("a") 100 | local store = bf:store() 101 | local bf_load = BloomFilter.load(store) 102 | -- assert_equal(10, bf_load.items) 103 | assert_equal(0.1, bf_load.probability) 104 | assert_equal(0, bf_load:query("b")) 105 | assert_equal(1, bf_load:query("a")) 106 | bf_load:add("b") 107 | assert_equal(1, bf_load:query("b")) 108 | end 109 | 110 | if not HAS_RUNNER then lunit.run() end 111 | --------------------------------------------------------------------------------