├── .gitignore ├── package.lisp ├── cl-bloom.asd ├── LICENSE.txt ├── README.md └── cl-bloom.lisp /.gitignore: -------------------------------------------------------------------------------- 1 | *~ 2 | *.fasl 3 | -------------------------------------------------------------------------------- /package.lisp: -------------------------------------------------------------------------------- 1 | ;;;; package.lisp 2 | 3 | (defpackage #:cl-bloom 4 | (:use #:cl #:cl-murmurhash) 5 | (:nicknames :bloom) 6 | (:export 7 | :*false-drop-rate* 8 | :bloom-filter-p 9 | :make-filter 10 | :destroy-filter 11 | :with-filter 12 | :make-set-filter 13 | :add 14 | :memberp 15 | :copy-filter 16 | :make-compatible-filter 17 | :filter-union 18 | :filter-nunion 19 | :filter-ior 20 | :filter-intersection 21 | :filter-nintersection 22 | :filter-and)) 23 | -------------------------------------------------------------------------------- /cl-bloom.asd: -------------------------------------------------------------------------------- 1 | ;;;; cl-bloom.asd 2 | 3 | #+(or sbcl ccl cmucl ecl lispworks allegro) 4 | (eval-when (:compile-toplevel :load-toplevel :execute) 5 | (pushnew :cl-bloom-sv *features*)) 6 | 7 | (asdf:defsystem #:cl-bloom 8 | :author "Paul M. Rodriguez " 9 | :description "Simple Bloom filters with efficient hashing." 10 | :license "MIT" 11 | :serial t 12 | :depends-on (#:cl-murmurhash 13 | #+cl-bloom-sv 14 | #:static-vectors) 15 | :components ((:file "package") 16 | (:file "cl-bloom"))) 17 | -------------------------------------------------------------------------------- /LICENSE.txt: -------------------------------------------------------------------------------- 1 | Copyright (c) 2011 Paul M. Rodriguez 2 | 3 | Permission is hereby granted, free of charge, to any person obtaining 4 | a copy of this software and associated documentation files (the 5 | "Software"), to deal in the Software without restriction, including 6 | without limitation the rights to use, copy, modify, merge, publish, 7 | distribute, sublicense, and/or sell copies of the Software, and to 8 | permit persons to whom the Software is furnished to do so, subject to 9 | the following conditions: 10 | 11 | The above copyright notice and this permission notice shall be 12 | included in all copies or substantial portions of the Software. 13 | 14 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 15 | EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 16 | MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 17 | NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE 18 | LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION 19 | OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION 20 | WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 21 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | A simple Common Lisp implementation of Bloom filters with efficient hashing. 2 | 3 | To make an empty filter, use MAKE-FILTER, which takes parameters for capacity and false drop (false positive) rate. If no false drop rate 4 | is specified, the value of *FALSE-DROP-RATE* is used. And also the third parameter specify whether the space is allocated from heap or by using `static-vectors:make-static-vector`. 5 | 6 | ```common-lisp 7 | ;;; the space is allocated from heap by default 8 | (defparameter *my-filter* 9 | (bloom:make-filter :capacity 1000 :false-drop-rate 1/1000)) 10 | ;;; by setting the third parameter `:static` to T, 11 | ;;; the space will be allocated statically 12 | (defparameter *my-filter* 13 | (bloom:make-filter :capacity 1000 :false-drop-rate 1/1000 :static t)) 14 | ``` 15 | 16 | Good values for the "order" (size) and "degree" (number of hashes) are 17 | calculated internally to obtain the theoretically ideal dimensions for 18 | a Bloom filter having the given parameters. 19 | 20 | To add an element to a filter, use **ADD**: 21 | 22 | ```common-lisp 23 | (bloom:add *my-filter* "Add me") 24 | ``` 25 | 26 | To test for membership, use **MEMBERP**: 27 | 28 | ```common-lisp 29 | (bloom:memberp *my-filter* "Add me") 30 | => T 31 | ``` 32 | 33 | Since when the space is allocated by using `static-vectors:make-static-vector`, users must explicitly free the space by using `static-vectors:free-static-vector`. We thus provide two APIs, `destroy-filter` and `with-filter`, to help with that: 34 | 35 | ### destroy-filter 36 | ```common-lisp 37 | (bloom:destroy-filter *filter*) 38 | ;; => a destroyed filter instance, where all slots are being either set to NIL or freed 39 | ``` 40 | 41 | ### with-filter 42 | ```common-lisp 43 | ;;; A 'with-' wrapper around filter, pretty useful when the space is allocated statically; 44 | ;;; it will free the space 'automatically'. 45 | CL-USER> (bloom:with-filter (filter :capacity 10 :static t) 46 | (bloom:add filter "add") 47 | (bloom:add filter "minus") 48 | (print (bloom:memberp filter "add")) 49 | (print (bloom:memberp filter "minus"))) 50 | 51 | T 52 | T 53 | ; No value 54 | ``` 55 | 56 | When filters are used as sets, `FILTER-UNION`, `FILTER-NUNION`, `FILTER-NINTERSECTION`, and `FILTER-INTERSECTION` behave like their namesakes. `FILTER-IOR` and `FILTER-AND` are shorthands for lists of filters. 57 | 58 | The other utilities for composing filters are `MAKE-COMPATIBLE-FILTER`, which takes a filter and returns an empty, compatible filter, and `COPY-FILTER`, which takes a filter and returns an independent copy. 59 | 60 | The utility `MAKE-SET-FILTER` covers one use case for Bloom filters: given a list, it returns a filter suitable for testing membership in 61 | that list, considered as a set. 62 | -------------------------------------------------------------------------------- /cl-bloom.lisp: -------------------------------------------------------------------------------- 1 | ;;;; cl-bloom.lisp 2 | 3 | (in-package #:cl-bloom) 4 | 5 | ;;; "cl-bloom" goes here. Hacks and glory await! 6 | 7 | (defparameter *false-drop-rate* 1/1000 8 | "Acceptable rate of false drops.") 9 | 10 | (defun opt-degree () 11 | (ceiling (log *false-drop-rate* 1/2))) 12 | 13 | (defun opt-order (capacity) 14 | (ceiling (* (log (/ 1 *false-drop-rate*)) 15 | #.(/ (expt (log 2) 2)) 16 | capacity))) 17 | 18 | (defun make-bit-vector (size &key (allocation :heap)) 19 | (assert (member allocation '(nil :heap :static)) nil 20 | (error "The `allocation` should be either :heap or :static, or it's set to :heap by default.")) 21 | (let ((params `(,size :element-type bit :initial-element 0))) 22 | (apply (case allocation 23 | ((nil :heap) 'make-array) 24 | (:static 25 | (or #+cl-bloom-sv 'static-vectors:make-static-vector 26 | 'make-array))) 27 | params))) 28 | 29 | (defclass bloom-filter () 30 | ((array :accessor filter-array :initarg :array :type simple-bit-vector) 31 | (%array-static-p% :accessor filter-array-static-p :initarg :array-static-p :type symbol) 32 | (order :accessor filter-order :initarg :order :type integer) 33 | (degree :accessor filter-degree :initarg :degree :type integer) 34 | (seed :accessor filter-seed :initarg :seed :type integer 35 | :documentation "Cache the value of MURMURHASH:*DEFAULT-SEED* 36 | at the time the filter was created, lest changing the default 37 | seed invalidate the filter.")) 38 | (:default-initargs 39 | :degree (opt-degree) 40 | :order 256 41 | :seed *default-seed*)) 42 | 43 | (defmethod initialize-instance :after ((filter bloom-filter) &key order static) 44 | (setf (slot-value filter 'array) 45 | (make-bit-vector order :allocation (if static :static :heap)) 46 | (slot-value filter '%array-static-p%) 47 | static)) 48 | 49 | (defun bloom-filter-p (object) 50 | (typep object 'bloom-filter)) 51 | 52 | (defun make-filter (&key (capacity 256) (false-drop-rate *false-drop-rate*) (static nil)) 53 | "Return a Bloom filter long enough to hold CAPACITY entries with the 54 | specified FALSE-DROP-RATE." 55 | (assert (< 0 false-drop-rate 1)) 56 | (assert (> capacity 0)) 57 | (let* ((*false-drop-rate* false-drop-rate) 58 | (order (opt-order capacity))) 59 | (make-instance 'bloom-filter :order order :static static))) 60 | 61 | (defun make-set-filter (list &key (static nil)) 62 | "Make a Bloom filter from the elements of LIST, optimizing the order and 63 | degree of the filter according to the size of the set." 64 | (declare (list list)) 65 | (let* ((*default-seed* (make-perfect-seed list)) 66 | (filter (make-filter :capacity (length list) :static static))) 67 | (dolist (element list) 68 | (add filter element)) 69 | filter)) 70 | 71 | ;; Cf. Kirsch and Mitzenmacher, "Less Hashing, Same 72 | ;; Performance: Building a Better Bloom Filter". 73 | ;; 74 | 75 | (declaim (inline fake-hash)) 76 | 77 | (defun fake-hash (hash1 hash2 index order) 78 | (mod (+ hash1 (* index hash2)) order)) 79 | 80 | (defun add (filter element) 81 | "Make FILTER include ELEMENT." 82 | (declare (bloom-filter filter)) 83 | (with-slots (order degree array seed) filter 84 | (let* ((hash1 (murmurhash element :seed seed)) 85 | (hash2 (murmurhash element :seed hash1))) 86 | (loop for i to (1- degree) 87 | for index = (fake-hash hash1 hash2 i order) 88 | do (setf (sbit array index) 1))))) 89 | 90 | (defun memberp (filter element) 91 | "Return NIL if ELEMENT is definitely not present in FILTER. 92 | Return T if it might be present." 93 | (declare (bloom-filter filter)) 94 | (with-slots (order degree array seed) filter 95 | (let* ((hash1 (murmurhash element :seed seed)) 96 | (hash2 (murmurhash element :seed hash1))) 97 | (loop for i to (1- degree) 98 | for index = (fake-hash hash1 hash2 i order) 99 | always (= 1 (sbit array index)))))) 100 | 101 | (defun make-compatible-filter (filter) 102 | "Return a new Bloom filter having the same order, degree, and seed 103 | as FILTER." 104 | (declare (bloom-filter filter)) 105 | (with-slots (order degree seed) filter 106 | (make-instance 'bloom-filter 107 | :order order 108 | :degree degree 109 | :seed seed))) 110 | 111 | (define-condition incompatible-filter (error) 112 | ((filter :initarg :filter :reader filter))) 113 | 114 | (defun compatible? (filter1 filter2) 115 | (declare (bloom-filter filter1 filter2)) 116 | (and (= (filter-order filter1) 117 | (filter-order filter2)) 118 | (= (filter-degree filter1) 119 | (filter-degree filter2)) 120 | (= (filter-seed filter1) 121 | (filter-seed filter2)))) 122 | 123 | (defun filter-nunion (filter1 filter2) 124 | "Return the union of FILTER1 and FILTER2, overwriting FILTER1." 125 | (declare (bloom-filter filter1 filter2)) 126 | (unless (compatible? filter1 filter2) 127 | (error 'incompatible-filter :filter filter2)) 128 | (bit-ior (filter-array filter1) (filter-array filter2) 129 | (filter-array filter1)) 130 | filter1) 131 | 132 | (defun copy-filter (filter) 133 | "Return a new Bloom filter like FILTER." 134 | (declare (bloom-filter filter)) 135 | (filter-nunion 136 | (make-compatible-filter filter) 137 | filter)) 138 | 139 | (defun destroy-filter (filter) 140 | "Destroy a Bloom filter instance. When its bit array is allocated statically, 141 | then free the memory and set the reference of each slot to a default value by its type; 142 | otherwise, set all the references of slots to a default value by its type." 143 | (with-slots (array %array-static-p% order degree seed) filter 144 | (setf order -1 degree -1 seed -1) 145 | #+cl-bloom-sv 146 | (progn 147 | (when %array-static-p% 148 | (static-vectors:free-static-vector array)) 149 | (setf array #* %array-static-p% nil))) 150 | filter) 151 | 152 | (defmacro with-filter 153 | ((var &key (capacity 256) (false-drop-rate *false-drop-rate*) (static nil)) &body body) 154 | "A 'with-' wrapper around filter, pretty useful when the array space is allocated statically." 155 | (let ((filter (make-filter :capacity capacity :false-drop-rate false-drop-rate :static static))) 156 | `(let ((,var ,filter)) 157 | (unwind-protect (progn ,@body (values)) 158 | (destroy-filter ,var))))) 159 | 160 | (defun filter-union (filter1 filter2) 161 | "Return the union of FILTER1 and FILTER2 as a new filter." 162 | (declare (bloom-filter filter1 filter2)) 163 | (unless (compatible? filter1 filter2) 164 | (error 'incompatible-filter :filter filter2)) 165 | (let ((filter3 (make-compatible-filter filter1))) 166 | (bit-ior (filter-array filter1) (filter-array filter2) 167 | (filter-array filter3)) 168 | filter3)) 169 | 170 | (defun filter-ior (&rest filters) 171 | "Return union of all FILTERS as a new filter." 172 | (reduce #'filter-nunion 173 | (cdr filters) 174 | :initial-value (copy-filter (car filters)))) 175 | 176 | (defun filter-nintersection (filter1 filter2) 177 | "Return the intersection of FILTER1 and FILTER2, overwriting FILTER1." 178 | (declare (bloom-filter filter1 filter2)) 179 | (unless (compatible? filter1 filter2) 180 | (error 'incompatible-filter :filter filter2)) 181 | (bit-and (filter-array filter1) (filter-array filter2) 182 | (filter-array filter1)) 183 | filter1) 184 | 185 | (defun filter-and (&rest filters) 186 | "Return intersection of all FILTERS as a new filter." 187 | (reduce #'filter-nintersection 188 | (cdr filters) 189 | :initial-value (copy-filter (car filters)))) 190 | 191 | (defun filter-intersection (filter1 filter2) 192 | "Return the intersection of FILTER1 and FILTER2 as a new filter." 193 | (declare (bloom-filter filter1 filter2)) 194 | (unless (compatible? filter1 filter2) 195 | (error 'incompatible-filter :filter filter2)) 196 | (let ((filter3 (make-compatible-filter filter1))) 197 | (bit-and (filter-array filter1) (filter-array filter2) 198 | (filter-array filter3)) 199 | filter3)) 200 | --------------------------------------------------------------------------------