├── .stylish-haskell.yaml ├── .travis.yml ├── HLint.hs ├── LICENSE ├── README.md ├── Setup.hs ├── lib └── Data │ └── BitMap │ ├── Roaring.hs │ └── Roaring │ ├── Chunk.hs │ └── Utility.hs ├── rawr.cabal ├── stack.yaml └── test └── properties.hs /.stylish-haskell.yaml: -------------------------------------------------------------------------------- 1 | # stylish-haskell configuration file 2 | # ================================== 3 | 4 | # The stylish-haskell tool is mainly configured by specifying steps. These steps 5 | # are a list, so they have an order, and one specific step may appear more than 6 | # once (if needed). Each file is processed by these steps in the given order. 7 | steps: 8 | # Convert some ASCII sequences to their Unicode equivalents. This is disabled 9 | # by default. 10 | # - unicode_syntax: 11 | # # In order to make this work, we also need to insert the UnicodeSyntax 12 | # # language pragma. If this flag is set to true, we insert it when it's 13 | # # not already present. You may want to disable it if you configure 14 | # # language extensions using some other method than pragmas. Default: 15 | # # true. 16 | # add_language_pragma: true 17 | 18 | # Import cleanup 19 | - imports: 20 | # There are different ways we can align names and lists. 21 | # 22 | # - global: Align the import names and import list throughout the entire 23 | # file. 24 | # 25 | # - group: Only align the imports per group (a group is formed by adjacent 26 | # import lines). 27 | # 28 | # - none: Do not perform any alignment. 29 | # 30 | # Default: global. 31 | align: none 32 | 33 | # Language pragmas 34 | - language_pragmas: 35 | # We can generate different styles of language pragma lists. 36 | # 37 | # - vertical: Vertical-spaced language pragmas, one per line. 38 | # 39 | # - compact: A more compact style. 40 | # 41 | # Default: vertical. 42 | style: vertical 43 | 44 | # stylish-haskell can detect redundancy of some language pragmas. If this 45 | # is set to true, it will remove those redundant pragmas. Default: true. 46 | remove_redundant: true 47 | 48 | # Align the types in record declarations 49 | - records: {} 50 | 51 | # Replace tabs by spaces. This is disabled by default. 52 | - tabs: 53 | # Number of spaces to use for each tab. Default: 8, as specified by the 54 | # Haskell report. 55 | spaces: 4 56 | 57 | # Remove trailing whitespace 58 | - trailing_whitespace: {} 59 | 60 | # A common setting is the number of columns (parts of) code will be wrapped 61 | # to. Different steps take this into account. Default: 80. 62 | columns: 78 63 | 64 | # Sometimes, language extensions are specified in a cabal file or from the 65 | # command line instead of using language pragmas in the file. stylish-haskell 66 | # needs to be aware of these, so it can parse the file correctly. 67 | # 68 | # No language extensions are enabled by default. 69 | # language_extensions: 70 | # - TemplateHaskell 71 | # - QuasiQuotes 72 | -------------------------------------------------------------------------------- /.travis.yml: -------------------------------------------------------------------------------- 1 | language: haskell 2 | ghc: 7.8 3 | notifications: 4 | email: 5 | on_success: change 6 | on_failure: change 7 | before_install: 8 | - cabal sandbox init 9 | -------------------------------------------------------------------------------- /HLint.hs: -------------------------------------------------------------------------------- 1 | {-# LANGUAGE PackageImports #-} 2 | {-# LANGUAGE TemplateHaskell #-} 3 | module HLint.HLint where 4 | 5 | import "hint" HLint.Builtin.All 6 | import "hint" HLint.Default 7 | import "hint" HLint.Dollar 8 | import "hint" HLint.Generalise 9 | 10 | ignore "Use if" 11 | ignore "Use liftM" 12 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Copyright (c) 2015, Thomas Sutton 2 | 3 | All rights reserved. 4 | 5 | Redistribution and use in source and binary forms, with or without 6 | modification, are permitted provided that the following conditions are met: 7 | 8 | * Redistributions of source code must retain the above copyright 9 | notice, this list of conditions and the following disclaimer. 10 | 11 | * Redistributions in binary form must reproduce the above 12 | copyright notice, this list of conditions and the following 13 | disclaimer in the documentation and/or other materials provided 14 | with the distribution. 15 | 16 | * Neither the name of Thomas Sutton nor the names of other 17 | contributors may be used to endorse or promote products derived 18 | from this software without specific prior written permission. 19 | 20 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 21 | "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 22 | LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 23 | A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 24 | OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 25 | SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 26 | LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 27 | DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 28 | THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 29 | (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 30 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 31 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | Rawr 2 | ==== 3 | 4 | [![Build Status][3]][2] 5 | 6 | This is an implementation of the [roaring bitmaps][1] data structure in 7 | Haskell. Roaring bitmaps is a compressed bitmap data structure which offers 8 | better compression and performance than other compressed bitmaps in many 9 | situations. 10 | 11 | [1]: http://www.roaringbitmap.org/ 12 | [2]: https://travis-ci.org/thsutton/rawr 13 | [3]: https://travis-ci.org/thsutton/rawr.svg?branch=master 14 | -------------------------------------------------------------------------------- /Setup.hs: -------------------------------------------------------------------------------- 1 | import Distribution.Simple 2 | main = defaultMain 3 | -------------------------------------------------------------------------------- /lib/Data/BitMap/Roaring.hs: -------------------------------------------------------------------------------- 1 | -- | 2 | -- Module: Data.BitMap.Roaring 3 | -- Description: Compressed bitmap data structure with good performance. 4 | -- Copyright: (c) Thomas Sutton 2015 5 | -- License: BSD3 6 | -- Maintainer: me@thomas-sutton.id.au 7 | -- Stability: experimental 8 | -- 9 | -- A compressed bitmaps with good space and time performance. 10 | -- 11 | -- These modules are intended to be imported qualified, to avoid name clashes 12 | -- with Prelude functions, e.g. 13 | -- 14 | -- > import Data.BitMap.Roaring (BitMap) 15 | -- > import qualified Data.BitMap.Roaring as Roaring 16 | -- 17 | -- The implementation paritions values into chunks based on their high 16 bits. 18 | -- Chunks are represented differently based on their density: low-density 19 | -- chunks are stored as packed arrays of the low-order bits while high-density 20 | -- chunks are stored as bit vectors. 21 | -- 22 | -- * Samy Chambi, Daniel Lemire, Owen Kaser, Robert Godin, 23 | -- \"/Better bitmap performance with Roaring bitmaps/\", Software: Practice 24 | -- and Experience (to appear) 25 | -- 26 | module Data.BitMap.Roaring where 27 | 28 | import Data.Monoid 29 | import Data.Vector (Vector) 30 | import qualified Data.Vector as V 31 | import Data.Word 32 | 33 | import Data.BitMap.Roaring.Chunk 34 | import Data.BitMap.Roaring.Utility 35 | 36 | -- | A set of bits. 37 | data BitMap = BitMap (Vector Chunk) 38 | deriving (Show) 39 | 40 | type Key = Word32 41 | 42 | -- * Query 43 | 44 | -- | /O(1)./ Is the set empty? 45 | null :: BitMap -> Bool 46 | null (BitMap v) = V.null v 47 | 48 | -- | Cardinality of the set. 49 | size :: BitMap -> Int 50 | size (BitMap cs) = V.sum $ V.map chunkCardinality cs 51 | 52 | -- | Is the value a member of the set? 53 | member :: Key -> BitMap -> Bool 54 | member k (BitMap cs) = 55 | let (i,b) = splitWord k 56 | in case vLookup (\c -> i == chunkIndex c) cs of 57 | Nothing -> False 58 | Just (_,c) -> chunkGet b c 59 | 60 | -- | Is this a subset? 61 | -- @(s1 `isSubsetOf` s2)@ tells whether @s1@ is a subset of @s2. 62 | isSubsetOf :: BitMap -> BitMap -> Bool 63 | isSubsetOf _ _ = False 64 | 65 | -- | Is this a proper subset? (i.e. a subset but not equal). 66 | isProperSubsetOf :: BitMap -> BitMap -> Bool 67 | isProperSubsetOf a b = a `isSubsetOf` b && not (b `isSubsetOf` a) 68 | 69 | -- | Count the bits set in the range [0,i]. 70 | rank :: BitMap -> Int -> Int 71 | rank (BitMap _cs) _ = 0 72 | 73 | -- | Find the index of the ith set bit. 74 | select :: BitMap -> Int -> Maybe Key 75 | select (BitMap _cs) _ = Nothing 76 | 77 | -- * Construction 78 | 79 | -- | /O(1)./ The empty set. 80 | empty :: BitMap 81 | empty = BitMap mempty 82 | 83 | -- | /O(1)./ A set of one element. 84 | singleton :: Key -> BitMap 85 | singleton k = insert k empty 86 | 87 | -- | Add a value to the set. 88 | insert :: Key -> BitMap -> BitMap 89 | insert k (BitMap v) = 90 | let (i,b) = splitWord k 91 | f = Just . maybe (chunkNew i b) (chunkSet b) 92 | v' = vAlter f (\c -> i == chunkIndex c) v 93 | in BitMap v' 94 | 95 | -- | Delete a value in the set. 96 | -- 97 | -- Returns the original set when the value was not present. 98 | delete :: Key -> BitMap -> BitMap 99 | delete k (BitMap v) = 100 | let (i,b) = splitWord k 101 | v' = vAlter (f b) (\c -> i == chunkIndex c) v 102 | in BitMap v' 103 | where 104 | f _ Nothing = Nothing 105 | f b (Just c) = 106 | let c' = chunkClear b c 107 | in if 0 == chunkCardinality c' 108 | then Nothing 109 | else Just c' 110 | 111 | -- * Combine 112 | 113 | -- | The union of two sets. 114 | union :: BitMap -> BitMap -> BitMap 115 | union (BitMap cs) (BitMap ds) = BitMap $ mergeWith f cs ds 116 | where 117 | f :: Maybe Chunk -> Maybe Chunk -> Maybe Chunk 118 | f Nothing b = b 119 | f a Nothing = a 120 | f (Just a) (Just b) = Just $ mergeChunks a b 121 | 122 | -- | The difference between two sets. 123 | difference :: BitMap -> BitMap -> BitMap 124 | difference _ _ = empty 125 | 126 | -- | The intersection of two sets. 127 | intersection :: BitMap -> BitMap -> BitMap 128 | intersection _ _ = empty 129 | 130 | -- * Conversion 131 | 132 | -- ** List 133 | 134 | elems :: BitMap -> [Key] 135 | elems = toAscList 136 | 137 | toList :: BitMap -> [Key] 138 | toList = toAscList 139 | 140 | fromList :: [Key] -> BitMap 141 | fromList = foldl (flip insert) empty 142 | 143 | -- ** Ordered list 144 | 145 | -- | Produce a list of 'Key's in a 'BitMap', in descending order. 146 | toAscList :: BitMap -> [Key] 147 | toAscList (BitMap cs) = work cs [] 148 | where 149 | work cs' l | V.null cs' = l 150 | | otherwise = let c = chunkToBits $ V.head cs' 151 | cs'' = V.tail cs' 152 | in work cs'' (l <> c) 153 | 154 | -- | Produce a list of 'Key's in a 'BitMap', in descending order. 155 | toDescList :: BitMap -> [Key] 156 | toDescList = reverse . toAscList 157 | 158 | -- | Build a 'BitMap' from a list of 'Key's. 159 | -- 160 | -- Precondition: input is sorted ascending order. 161 | -- 162 | -- TODO(thsutton) Throw error if precondition violated. 163 | -- TODO(thsutton) Implement 164 | fromAscList :: [Key] -> BitMap 165 | fromAscList _ = empty 166 | 167 | -- * Utility 168 | 169 | 170 | -- | Merge two 'Vector's of 'Chunk's. 171 | -- 172 | -- Precondition: Both vectors are sorted by 'chunkIndex'. 173 | -- Postcondition: Output vector sorted by 'chunkIndex'. 174 | -- Postcondition: length(output) >= max(length(a),length(b)) 175 | mergeWith 176 | :: (Maybe Chunk -> Maybe Chunk -> Maybe Chunk) 177 | -- ^ Merge two chunks with the same index. 178 | -> Vector Chunk 179 | -> Vector Chunk 180 | -> Vector Chunk 181 | mergeWith f v1 v2 182 | | V.null v1 = v2 183 | | V.null v2 = v1 184 | | otherwise = 185 | let a = V.head v1 186 | b = V.head v2 187 | in work a v1 b v2 188 | where 189 | -- Note: we take the head and the *entirety* of each vector; NOT the head 190 | -- and the tail! 191 | work :: Chunk -> Vector Chunk -> Chunk -> Vector Chunk -> Vector Chunk 192 | work a as b bs = case a `compare` b of 193 | LT -> a `V.cons` mergeWith f (V.tail as) bs 194 | EQ -> mergeChunks a b `V.cons` mergeWith f (V.tail as) (V.tail bs) 195 | GT -> b `V.cons` mergeWith f as (V.tail bs) 196 | -------------------------------------------------------------------------------- /lib/Data/BitMap/Roaring/Chunk.hs: -------------------------------------------------------------------------------- 1 | module Data.BitMap.Roaring.Chunk where 2 | 3 | import Control.Applicative 4 | import Data.Bits 5 | import Data.Monoid 6 | import qualified Data.Vector.Unboxed as U 7 | import Data.Word 8 | 9 | import Data.BitMap.Roaring.Utility 10 | 11 | -- | A chunk representing the keys which share particular 16 high-order bits. 12 | -- 13 | -- Chunk with low density (i.e. no more than 4096 members) are represented as a 14 | -- sorted array of their low 16 bits. Chunks with high density (i.e. more than 15 | -- 4096 members) are represented by a bit vector. 16 | -- 17 | -- Both high and low density chunks include the high order bits shared by all 18 | -- entries in the chunk, and the cardinality of the chunk. 19 | data Chunk 20 | = LowDensity 21 | { chunkIndex :: Word16 22 | , chunkCardinality :: Int 23 | , chunkArray :: U.Vector Word16 24 | } 25 | | HighDensity 26 | { chunkIndex :: Word16 27 | , chunkCardinality :: Int 28 | , chunkBits :: U.Vector Word64 29 | } 30 | deriving (Eq,Show) 31 | 32 | -- | 'Chunk's are ordered by their index. 33 | instance Ord Chunk where 34 | compare c1 c2 = compare (chunkIndex c1) (chunkIndex c2) 35 | 36 | -- | Create a new chunk. 37 | chunkNew :: Word16 -> Word16 -> Chunk 38 | chunkNew i v = LowDensity i 1 (U.singleton v) 39 | 40 | -- | Extract the 'Word32's stored in a 'Chunk'. 41 | chunkToBits :: Chunk -> [Word32] 42 | chunkToBits (LowDensity i _ a) = combineWord i <$> U.toList a 43 | chunkToBits (HighDensity i _ a) = U.toList . U.concatMap f $ U.indexed a 44 | where 45 | f :: (Int, Word64) -> U.Vector Word32 46 | f (_p,_bs) = U.map (combineWord i) U.empty 47 | 48 | 49 | -- | Get a bit from a 'Chunk'. 50 | chunkGet :: Word16 -> Chunk -> Bool 51 | chunkGet v chunk = case chunk of 52 | LowDensity _ _ a -> U.elem v a 53 | HighDensity{} -> False -- TODO(thsutton) implement 54 | 55 | -- | Set a bit in a chunk. 56 | -- 57 | -- TODO(thsutton) Promote LowDensity chunk when it rises above threshold. 58 | chunkSet :: Word16 -> Chunk -> Chunk 59 | chunkSet v chunk = case chunk of 60 | LowDensity i c a -> LowDensity i c (setL v a) 61 | HighDensity i c a -> HighDensity i c (setH v a) 62 | where 63 | setL :: Word16 -> U.Vector Word16 -> U.Vector Word16 64 | setL i a = uvInsert a i 65 | setH :: Word16 -> U.Vector Word64 -> U.Vector Word64 66 | setH _ a = a -- TODO(thsutton) implement 67 | 68 | -- | Clear a bit in a 'Chunk'. 69 | -- 70 | -- TODO(thsutton) Demote HighDensity chunk when it falls below threshold. 71 | chunkClear :: Word16 -> Chunk -> Chunk 72 | chunkClear v chunk = case chunk of 73 | LowDensity i _ a -> 74 | let a' = clearL v a 75 | c' = U.length a' 76 | in LowDensity i c' a' 77 | HighDensity i _ a -> 78 | let a' = clearH v a 79 | c' = U.sum $ U.map popCount a' 80 | in HighDensity i c' a' 81 | where 82 | clearL :: Word16 -> U.Vector Word16 -> U.Vector Word16 83 | clearL i a = uvDelete a i 84 | clearH :: Word16 -> U.Vector Word64 -> U.Vector Word64 85 | clearH _ a = a -- TODO(thsutton) implement 86 | 87 | -- | Take the union of two 'Chunk's, raising an 'error' if they do not share an 88 | -- index. 89 | mergeChunks :: Chunk -> Chunk -> Chunk 90 | mergeChunks c1 c2 = 91 | if chunkIndex c1 == chunkIndex c2 92 | then merge c1 c2 93 | else error "Attempting to merge incompatible chunks!" 94 | where 95 | aPop :: U.Vector Word64 -> Int 96 | aPop = U.sum . U.map popCount 97 | aSet :: Word16 -> U.Vector Word64 -> U.Vector Word64 98 | aSet _i v = v 99 | packA :: U.Vector Word16 -> U.Vector Word64 100 | packA _ = mempty 101 | merge (HighDensity i _ a1) (HighDensity _ _ a2) = 102 | let a' = U.zipWith (.|.) a1 a2 in HighDensity i (aPop a') a' 103 | merge (HighDensity i _ ah) (LowDensity _ _ al) = 104 | let a' = U.foldr' aSet ah al in HighDensity i (aPop a') a' 105 | merge (LowDensity i _ al) (HighDensity _ _ ah) = 106 | let a' = U.foldr' aSet ah al in HighDensity i (aPop a') a' 107 | merge (LowDensity i _ a1) (LowDensity _ _ a2) = 108 | let a' = vMerge a1 a2 109 | n' = U.length a' 110 | -- TODO(thsutton): Is this eager enough? 111 | in if n' <= 4096 112 | then LowDensity i n' a' 113 | else HighDensity i n' (packA a') 114 | -------------------------------------------------------------------------------- /lib/Data/BitMap/Roaring/Utility.hs: -------------------------------------------------------------------------------- 1 | module Data.BitMap.Roaring.Utility where 2 | 3 | import Data.Bits 4 | import Data.Convertible 5 | import Data.Monoid 6 | import qualified Data.Vector as V 7 | import qualified Data.Vector.Algorithms.Heap as VAH 8 | import qualified Data.Vector.Unboxed as U 9 | import Data.Word 10 | 11 | -- * Words 12 | 13 | -- | Split a 'Word32' into its high-order and low-order bits. 14 | splitWord :: Word32 -> (Word16, Word16) 15 | splitWord w = 16 | let h = convert $ rotate (0xffff0000 .&. w) 16 17 | l = convert $ 0x0000ffff .&. w 18 | in (h,l) 19 | 20 | -- | Combine two 'Word16's of the high-order and low-order bits into a 21 | -- 'Word32'. 22 | combineWord :: Word16 -> Word16 -> Word32 23 | combineWord h l = rotate (convert h) (-16) .|. convert l 24 | 25 | -- * Vectors 26 | 27 | vMerge :: (U.Unbox e, Ord e) => U.Vector e -> U.Vector e -> U.Vector e 28 | vMerge as bs 29 | | U.null as = bs 30 | | U.null bs = as 31 | | otherwise = 32 | let a = U.head as 33 | b = U.head bs 34 | in case a `compare` b of 35 | LT -> a `U.cons` vMerge (U.tail as) bs 36 | EQ -> a `U.cons` vMerge (U.tail as) (U.tail bs) 37 | GT -> b `U.cons` vMerge as (U.tail bs) 38 | 39 | -- | Alter the 'Chunk' with the given index in a vector of 'Chunk's. 40 | -- 41 | -- The function is passed 'Nothing' if the 'Chunk' is not present. 42 | -- 43 | -- If the function returns 'Nothing' the 'Chunk', if present, is deleted; 44 | -- otherwise it is replaced. 45 | vAlter 46 | :: Ord a 47 | => (Maybe a -> Maybe a ) 48 | -> (a -> Bool) 49 | -> V.Vector a 50 | -> V.Vector a 51 | vAlter f p v = case vLookup p v of 52 | Nothing -> case f Nothing of 53 | Nothing -> v 54 | Just c' -> vInsert v c' -- TODO(thsutton) Insert 55 | Just (i, a) -> case f (Just a) of 56 | Nothing -> vDelete v i 57 | Just c' -> V.update v (V.singleton (i, c')) 58 | 59 | -- | Search for a 'Chunk' with a specific index. 60 | -- 61 | -- TODO(thsutton) better search algorithm. 62 | vLookup :: Ord a => (a -> Bool) -> V.Vector a -> Maybe (Int, a) 63 | vLookup p v = case V.findIndex p v of 64 | Nothing -> Nothing 65 | Just i -> Just (i, v V.! i) 66 | 67 | -- | Insert a 'Chunk' into a vector, replacing the 68 | -- 69 | -- Precondition: input vector is sorted. 70 | -- Postcondition: output vector is sorted. 71 | -- Postcondition: a `elem` v'. 72 | -- 73 | -- TODO(thsutton): Efficiency. 74 | vInsert :: Ord a => V.Vector a -> a -> V.Vector a 75 | vInsert v a = 76 | if a `V.elem` v 77 | then v 78 | else V.modify VAH.sort $ V.cons a v 79 | 80 | -- | Delete the element at index. 81 | -- 82 | -- Return the vector unchanged if the index is out of bounds. 83 | vDelete :: Ord a => V.Vector a -> Int -> V.Vector a 84 | vDelete v p 85 | | p < 0 = v 86 | | V.length v < p = v 87 | | otherwise = case V.splitAt p v of 88 | (s, r) -> s <> V.tail r 89 | 90 | -- * Unboxed Vectors 91 | 92 | -- | Insert a 'Chunk' into an unboxed vector. 93 | -- 94 | -- Precondition: input vector is sorted. 95 | -- Postcondition: output vector is sorted. 96 | -- Postcondition: a `elem` v'. 97 | -- 98 | -- TODO(thsutton): Efficiency. 99 | uvInsert :: (U.Unbox a, Ord a) => U.Vector a -> a -> U.Vector a 100 | uvInsert v a = 101 | if a `U.elem` v 102 | then v 103 | else U.modify VAH.sort $ U.cons a v 104 | 105 | -- | Delete an element from an unboxed vector. 106 | -- 107 | -- Precondition: input vector is sorted. 108 | -- Postcondition: output vector is sorted. 109 | -- Postcondition: not $ a `elem` v' 110 | -- 111 | -- TODO(thsutton): Efficiency. 112 | uvDelete :: (U.Unbox a, Ord a) => U.Vector a -> a -> U.Vector a 113 | uvDelete v a = case U.elemIndex a v of 114 | Nothing -> v 115 | Just p -> case U.splitAt p v of 116 | (s, r) -> s <> U.tail r 117 | -------------------------------------------------------------------------------- /rawr.cabal: -------------------------------------------------------------------------------- 1 | name: rawr 2 | version: 0.1.0.0 3 | synopsis: Roaring Bitmaps compressed bitmap data-structure. 4 | description: Roaring Bitmaps compressed bitmap data-structure. 5 | homepage: https://github.com/thsutton/rawr/ 6 | license: BSD3 7 | license-file: LICENSE 8 | author: Thomas Sutton 9 | maintainer: me@thomas-sutton.id.au 10 | -- copyright: 11 | category: Data 12 | build-type: Simple 13 | extra-source-files: README.md 14 | cabal-version: >=1.10 15 | 16 | source-repository HEAD 17 | type: git 18 | location: https://github.com/thsutton/rawr.git 19 | 20 | library 21 | default-language: Haskell2010 22 | hs-source-dirs: lib 23 | exposed-modules: 24 | Data.BitMap.Roaring 25 | Data.BitMap.Roaring.Chunk 26 | Data.BitMap.Roaring.Utility 27 | build-depends: 28 | base >=4.7 && <4.8 29 | , convertible 30 | , containers 31 | , vector 32 | , vector-algorithms 33 | 34 | test-suite properties 35 | type: exitcode-stdio-1.0 36 | default-language: Haskell2010 37 | hs-source-dirs: test 38 | main-is: properties.hs 39 | build-depends: 40 | base 41 | , QuickCheck 42 | , containers 43 | , rawr 44 | -------------------------------------------------------------------------------- /stack.yaml: -------------------------------------------------------------------------------- 1 | flags: {} 2 | packages: 3 | - '.' 4 | extra-deps: [] 5 | resolver: lts-2.18 6 | -------------------------------------------------------------------------------- /test/properties.hs: -------------------------------------------------------------------------------- 1 | {-# LANGUAGE TemplateHaskell #-} 2 | 3 | module Main where 4 | 5 | import Control.Monad 6 | import Data.List 7 | import Data.Monoid 8 | import qualified Data.Set as S 9 | import Data.Word 10 | import System.Exit 11 | import Test.QuickCheck 12 | 13 | import qualified Data.BitMap.Roaring as R 14 | import qualified Data.BitMap.Roaring.Utility as R 15 | 16 | -- * Check utility functions 17 | 18 | -- | id == uncurry combineWord . splitWord 19 | prop_splitWord_combineWord_id :: Word32 -> Bool 20 | prop_splitWord_combineWord_id w = 21 | w == (uncurry R.combineWord . R.splitWord $ w) 22 | 23 | -- | The empty set is null. 24 | prop_null_empty :: Bool 25 | prop_null_empty = R.null R.empty 26 | 27 | -- | Singleton sets are not null. 28 | prop_not_null_singleton :: Word32 -> Bool 29 | prop_not_null_singleton i = not . R.null $ R.singleton i 30 | 31 | -- | Larger sets are not null. 32 | prop_not_null_fromList :: NonEmptyList Word32 -> Bool 33 | prop_not_null_fromList (NonEmpty is) = not . R.null $ R.fromList is 34 | 35 | -- | Empty sets have size zero. 36 | prop_size_empty :: Bool 37 | prop_size_empty = 0 == R.size R.empty 38 | 39 | -- | Singletons have size one. 40 | prop_size_singleton :: Word32 -> Bool 41 | prop_size_singleton i = 1 == R.size (R.singleton i) 42 | 43 | -- | Singletons have size 1, then size 0 when deleted. 44 | prop_size_delete_singleton :: Word32 -> Bool 45 | prop_size_delete_singleton i = 46 | let s = R.singleton i 47 | s' = R.delete i s 48 | in R.size s == 1 && R.size s' == 0 49 | 50 | -- | Singletons are empty when the sole item is deleted. 51 | prop_null_delete_singleton :: Word32 -> Bool 52 | prop_null_delete_singleton i = 53 | R.null . R.delete i $ R.singleton i 54 | 55 | -- | 'toAscList' produces sorted lists. 56 | prop_toAscList_sorted :: NonEmptyList Word32 -> Bool 57 | prop_toAscList_sorted (NonEmpty l) = 58 | let l' = S.toAscList (S.fromList l) 59 | in l' == sort l' 60 | 61 | -- | 'toDescList' produces sorted lists. 62 | prop_toDescList_sorted :: NonEmptyList Word32 -> Bool 63 | prop_toDescList_sorted (NonEmpty l) = 64 | let l' = S.toDescList (S.fromList l) 65 | in l' == sortBy (flip compare) l' 66 | 67 | -- | "Data.IntSet" and "Data.BitMap.Roaring" agree about a set when building 68 | -- from the same list of inputs. 69 | prop_intset_roaring_agree :: NonEmptyList Word32 -> Bool 70 | prop_intset_roaring_agree (NonEmpty l) = 71 | let r = R.toAscList $ R.fromList l 72 | s = S.toAscList $ S.fromList l 73 | in r == s 74 | 75 | -- | Every item in the source list should be an element. 76 | prop_map_elem_fromList :: NonEmptyList Word32 -> Bool 77 | prop_map_elem_fromList (NonEmpty l) = 78 | let r = R.fromList l 79 | in all (`R.member` r) l 80 | 81 | -- | union s1 s2 == fromList (toList s1 <> toList s2) 82 | prop_union_fromList :: NonEmptyList Word32 -> NonEmptyList Word32 -> Bool 83 | prop_union_fromList (NonEmpty as) (NonEmpty bs) = 84 | let q = R.fromList as 85 | r = R.fromList bs 86 | qr = R.fromList (as <> bs) 87 | in (R.toAscList qr == R.toAscList (q `R.union` r)) 88 | 89 | -- 90 | -- Use Template Haskell to automatically run all of the properties above. 91 | -- 92 | 93 | return [] 94 | runTests :: IO Bool 95 | runTests = $quickCheckAll 96 | 97 | main :: IO () 98 | main = do 99 | result <- runTests 100 | unless result exitFailure 101 | --------------------------------------------------------------------------------