├── .github └── workflows │ └── haskell-ci.yml ├── .gitignore ├── .vscode └── settings.json ├── ChangeLog ├── LICENSE ├── README.md ├── Setup.hs ├── TODO ├── benchmark ├── Data │ └── LargeHashable │ │ └── Benchmarks │ │ ├── CryptoHash.hs │ │ ├── Main.hs │ │ ├── Serial.hs │ │ └── Text.hs └── Main.hs ├── cbits └── md5.h ├── default.nix ├── hie.yaml ├── large-hashable.cabal ├── large-hashable.nix ├── run-benchmarks.sh ├── run-with-profiling ├── scripts └── ci-check ├── src └── Data │ ├── LargeHashable.hs │ └── LargeHashable │ ├── Class.hs │ ├── Endianness.hs │ ├── Intern.hs │ ├── LargeWord.hs │ ├── MD5.hs │ └── TH.hs ├── stack-ghc-9.2.yaml ├── stack-ghc-9.2.yaml.lock ├── stack-ghc-9.4.yaml ├── stack-ghc-9.4.yaml.lock ├── stack-ghc-9.6.yaml ├── stack-ghc-9.6.yaml.lock ├── stack-ghc-9.8.yaml ├── stack-ghc-9.8.yaml.lock ├── stack.yaml ├── stack.yaml.lock └── test ├── Data └── LargeHashable │ └── Tests │ ├── Class.hs │ ├── Helper.hs │ ├── Inspection.hs │ ├── LargeWord.hs │ ├── MD5.hs │ ├── Stable.hs │ └── TH.hs ├── Main.hs └── bigfile.txt /.github/workflows/haskell-ci.yml: -------------------------------------------------------------------------------- 1 | name: Haskell-CI 2 | on: 3 | push: 4 | branches: [ master ] 5 | pull_request: 6 | branches: [ master ] 7 | workflow_dispatch: 8 | jobs: 9 | linux: 10 | name: Haskell-CI - Linux - ${{ matrix.compiler }} 11 | runs-on: ubuntu-latest 12 | timeout-minutes: 13 | 60 14 | container: 15 | image: skogsbaer/ubuntu:20240502224354 16 | continue-on-error: false 17 | strategy: 18 | matrix: 19 | include: 20 | - compiler: ghc-9.8 21 | - compiler: ghc-9.6 22 | - compiler: ghc-9.4 23 | - compiler: ghc-9.2 24 | fail-fast: false 25 | steps: 26 | - name: Set PATH and environment variables 27 | run: | 28 | echo "LH_TEST_STACK_YAML=$GITHUB_WORKSPACE/stack-${{ matrix.compiler }}.yaml" >> "$GITHUB_ENV" 29 | - name: checkout 30 | uses: actions/checkout@v3 31 | - name: Permissions bug workaround 32 | run: "chown -R $(id -un):$(id -gn) ~" 33 | - name: Compute cache key 34 | run: | 35 | lsb_release -a > cache-key 2> /dev/null 36 | cat large-hashable.cabal $(which stack) "$LH_TEST_STACK_YAML" scripts/ci-check >> cache-key 37 | LH_CACHE_KEY="$(md5sum cache-key | cut -c 1-32)" 38 | echo "LH_CACHE_KEY=$LH_CACHE_KEY" >> "$GITHUB_ENV" 39 | echo "LH_CACHE_KEY=$LH_CACHE_KEY" 40 | - name: Cache stack dependencies 41 | uses: actions/cache@v3 42 | with: 43 | path: | 44 | ~/.stack 45 | ~/.LH-stack-work 46 | key: ${{ runner.OS }}-stack-cache-v1-${{ matrix.compiler }}-${{ env.LH_CACHE_KEY }} 47 | restore-keys: | 48 | ${{ runner.OS }}-stack-cache-v1-${{ matrix.compiler }} 49 | - name: test 50 | run: scripts/ci-check 51 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | dist 2 | cabal-dev 3 | *.o 4 | *.hi 5 | *.chi 6 | *.chs.h 7 | *.dyn_o 8 | *.dyn_hi 9 | .virtualenv 10 | .hpc 11 | .hsenv 12 | .cabal-sandbox/ 13 | cabal.sandbox.config 14 | *.prof 15 | *.aux 16 | *.hp 17 | .HTF/ 18 | .stack-work/ 19 | dist-newstyle/ 20 | -------------------------------------------------------------------------------- /.vscode/settings.json: -------------------------------------------------------------------------------- 1 | { 2 | "haskell.checkProject": false, 3 | "haskell.checkParents": false, 4 | "haskell.trace.client": "info", 5 | "haskell.openDocumentationInHackage": true, 6 | "haskell.openSourceInHackage": true, 7 | "haskell.sessionLoading": "multipleComponents", 8 | "haskell.plugin.hlint.codeActionsOn": false, 9 | "haskell.plugin.hlint.diagnosticsOn": false, 10 | "haskell.plugin.semanticTokens.globalOn": true 11 | } 12 | -------------------------------------------------------------------------------- /ChangeLog: -------------------------------------------------------------------------------- 1 | 0.1.2.0: 2025-01-13 2 | * replace cryptonite (deprecated) with crypton 3 | 4 | 0.1.1.0: 2023-05-19 5 | * fix for text version 2. The fix causes hashes for text values to change. Use the new 6 | function `largeHashStable` if you want to avoid that. (`largeHashStable` comes 7 | with a performance penalty, though.) 8 | 9 | 0.1.0.4: 2017-03-19 10 | * fixed build on `i686` related to non-fixed size integers 11 | 12 | 0.1.0.2: 2016-10-10 13 | * fixed build problems because of TemplateHaskell 2.11 14 | 15 | 0.1.0.1: 2016-08-10 16 | * fixed build issue: added bitfn.h to cabal file 17 | 18 | 0.1.0.0: 2016-08-09 19 | * initial release 20 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Copyright (c) 2015 - 2016 factis research GmbH 2 | 3 | All rights reserved. 4 | 5 | Redistribution and use in source and binary forms, with or without 6 | modification, are permitted provided that the following conditions are met: 7 | 8 | * Redistributions of source code must retain the above copyright 9 | notice, this list of conditions and the following disclaimer. 10 | 11 | * Redistributions in binary form must reproduce the above 12 | copyright notice, this list of conditions and the following 13 | disclaimer in the documentation and/or other materials provided 14 | with the distribution. 15 | 16 | * Neither the name of Author name here nor the names of other 17 | contributors may be used to endorse or promote products derived 18 | from this software without specific prior written permission. 19 | 20 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 21 | "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 22 | LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 23 | A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 24 | OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 25 | SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 26 | LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 27 | DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 28 | THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 29 | (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 30 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 31 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # LargeHashable 2 | 3 | [![BuildStatus](https://github.com/factisresearch/large-hashable/actions/workflows/haskell-ci.yml/badge.svg?branch=master)](https://github.com/factisresearch/large-hashable/actions/workflows/haskell-ci.yml/badge.svg?branch=master) 4 | [![Hackage](https://img.shields.io/hackage/v/large-hashable.svg)](http://hackage.haskell.org/package/large-hashable) 5 | 6 | Efficiently hash Haskell values with MD5, SHA256, SHA512 and other 7 | hashing algorithms. 8 | 9 | ## Install 10 | 11 | * Using cabal: `cabal install large-hashable` 12 | * Using Stack: `stack install large-hashable` 13 | 14 | ### Building from git repository 15 | 16 | - clone the repository 17 | - Install the stack tool (http://haskellstack.org) 18 | - `stack build` builds the code 19 | - `stack test` builds the code and runs the tests 20 | - `run-benchmarks.sh` runs the benchmark suite 21 | -------------------------------------------------------------------------------- /Setup.hs: -------------------------------------------------------------------------------- 1 | import Distribution.Simple 2 | main = defaultMain 3 | -------------------------------------------------------------------------------- /TODO: -------------------------------------------------------------------------------- 1 | - Support for SHA256 and SHA512, maybe also for more hashing algorithms 2 | - Documentation 3 | -------------------------------------------------------------------------------- /benchmark/Data/LargeHashable/Benchmarks/CryptoHash.hs: -------------------------------------------------------------------------------- 1 | {-# LANGUAGE ExistentialQuantification #-} 2 | {-# LANGUAGE MagicHash #-} 3 | module Data.LargeHashable.Benchmarks.CryptoHash where 4 | 5 | -- keep imports in alphabetic order (in Emacs, use "M-x sort-lines") 6 | import Data.Bits 7 | import Data.Byteable 8 | import Data.List (foldl') 9 | import Data.Word 10 | import qualified Crypto.Hash as H 11 | import qualified Data.ByteString as BS 12 | import qualified Data.ByteString.Base16 as Base16 13 | import qualified Data.ByteString.Builder as B 14 | import qualified Data.ByteString.Char8 as BSC 15 | import qualified Data.ByteString.Lazy as BSL 16 | import qualified Data.Text as T 17 | import qualified Data.Text.Encoding as T 18 | 19 | data HashAlgorithm 20 | = MD5 21 | | SHA256 22 | | SHA512 23 | deriving (Eq, Show) 24 | 25 | data HashCtx = forall h . H.HashAlgorithm h => HashCtx !(H.Context h) 26 | 27 | newtype Hash = Hash { unHash :: BS.ByteString } 28 | deriving (Eq) 29 | 30 | instance Show Hash where 31 | show (Hash bs) = 32 | BSC.unpack (Base16.encode bs) 33 | 34 | hashMd5 :: LargeHashable h => h -> Hash 35 | hashMd5 h = 36 | let ctx = hashInit MD5 37 | in hashFinish (hashUpdate ctx h) 38 | 39 | hashInit :: HashAlgorithm -> HashCtx 40 | hashInit alg = 41 | case alg of 42 | MD5 -> HashCtx (H.hashInit :: H.Context H.MD5) 43 | SHA256 -> HashCtx (H.hashInit :: H.Context H.SHA256) 44 | SHA512 -> HashCtx (H.hashInit :: H.Context H.SHA512) 45 | 46 | hashFinish :: HashCtx -> Hash 47 | hashFinish (HashCtx x) = Hash (toBytes $ H.hashFinalize x) 48 | 49 | updateFromBuilder :: HashCtx -> B.Builder -> HashCtx 50 | updateFromBuilder (HashCtx ctx) builder = 51 | HashCtx (H.hashUpdates ctx (BSL.toChunks (B.toLazyByteString builder))) 52 | 53 | class LargeHashable a where 54 | hashUpdate :: HashCtx -> a -> HashCtx 55 | 56 | instance LargeHashable BS.ByteString where 57 | hashUpdate (HashCtx x) bs = HashCtx (H.hashUpdate x bs) 58 | 59 | instance LargeHashable Int where 60 | hashUpdate (HashCtx ctx) i = 61 | -- we can make this faster by accessing the machine represenation of an int 62 | let w = (fromIntegral (toInteger i)) :: Word64 63 | in HashCtx (H.hashUpdate ctx (BS.pack [(fromIntegral (shiftR w 56) :: Word8) 64 | ,(fromIntegral (shiftR w 48) :: Word8) 65 | ,(fromIntegral (shiftR w 40) :: Word8) 66 | ,(fromIntegral (shiftR w 32) :: Word8) 67 | ,(fromIntegral (shiftR w 24) :: Word8) 68 | ,(fromIntegral (shiftR w 16) :: Word8) 69 | ,(fromIntegral (shiftR w 8) :: Word8) 70 | ,(fromIntegral (w) :: Word8)])) 71 | 72 | instance LargeHashable T.Text where 73 | hashUpdate (HashCtx ctx) t = HashCtx (H.hashUpdate ctx (T.encodeUtf8 t)) 74 | 75 | instance LargeHashable a => LargeHashable [a] where 76 | hashUpdate ctx l = 77 | foldl' hashUpdate ctx l 78 | -------------------------------------------------------------------------------- /benchmark/Data/LargeHashable/Benchmarks/Main.hs: -------------------------------------------------------------------------------- 1 | {-# LANGUAGE BangPatterns #-} 2 | {-# LANGUAGE DeriveAnyClass #-} 3 | {-# LANGUAGE DeriveGeneric #-} 4 | {-# LANGUAGE OverloadedStrings #-} 5 | {-# LANGUAGE TemplateHaskell #-} 6 | module Data.LargeHashable.Benchmarks.Main (main) where 7 | 8 | import Control.DeepSeq 9 | import Criterion 10 | import Criterion.Main 11 | import Data.SafeCopy 12 | import Data.Serialize.Put 13 | import GHC.Generics 14 | import qualified Data.Bytes.Serial as S 15 | import qualified Data.LargeHashable as LH 16 | import qualified Data.LargeHashable.Class as LH 17 | import qualified Data.LargeHashable.Intern as LH 18 | import qualified Data.LargeHashable.Benchmarks.CryptoHash as CH 19 | import qualified Data.LargeHashable.Benchmarks.Serial as Serial 20 | import qualified Data.LargeHashable.Benchmarks.Text as TextBench 21 | import qualified Data.Text as T 22 | 23 | data Patient 24 | = Patient 25 | { p_firstName :: !T.Text 26 | , p_lastName :: !T.Text 27 | , p_note :: !T.Text 28 | , p_age :: !Int 29 | } 30 | deriving (Eq, Show, NFData, Generic) 31 | 32 | $(deriveSafeCopy 0 'base ''Patient) 33 | $(LH.deriveLargeHashable ''Patient) 34 | 35 | instance S.Serial Patient 36 | 37 | instance CH.LargeHashable Patient where 38 | hashUpdate ctx0 p = 39 | let !ctx1 = CH.hashUpdate ctx0 (p_firstName p) 40 | !ctx2 = CH.hashUpdate ctx1 (p_lastName p) 41 | !ctx3 = CH.hashUpdate ctx2 (p_note p) 42 | !ctx4 = CH.hashUpdate ctx3 (p_age p) 43 | in ctx4 44 | 45 | updateHashPatient :: Patient -> LH.LH () 46 | updateHashPatient p = 47 | {-# SCC "updateHash/LargHashable" #-} 48 | do {-# SCC "updateHash/firstName" #-} LH.updateHash (p_firstName p) 49 | {-# SCC "updateHash/lastName" #-} LH.updateHash (p_lastName p) 50 | {-# SCC "updateHash/note" #-} LH.updateHash (p_note p) 51 | {-# SCC "updateHash/age" #-} LH.updateHash (p_age p) 52 | 53 | mkPatList :: Int -> [Patient] 54 | mkPatList n = 55 | let l = map mkPat [1..n] 56 | in l `deepseq` l 57 | where 58 | mkPat i = 59 | Patient 60 | { p_firstName = "Stefan" 61 | , p_lastName = "Wehr" 62 | , p_note = "Dies ist ein bißchen mehr Text, aber auch nicht richtig lang" 63 | , p_age = i 64 | } 65 | 66 | _NUM_ :: Int 67 | _NUM_ = 100000 68 | 69 | patList :: [Patient] 70 | patList = mkPatList _NUM_ 71 | 72 | main :: IO () 73 | main = 74 | defaultMain 75 | [ env (return patList) $ \l -> 76 | bgroup "patList" 77 | [ bench "safecopy" $ whnf (CH.hashMd5 . runPut . safePut) l 78 | , bench "cryptohash" $ whnf CH.hashMd5 l 79 | , bench "large-hashable (Manual)" $ whnf (LH.runLH LH.md5HashAlgorithm . LH.updateHashList updateHashPatient) l 80 | , bench "large-hashable (TH)" $ whnf (LH.largeHash LH.md5HashAlgorithm) l 81 | , bench "large-hashable (Generic)" $ whnf (LH.runLH LH.md5HashAlgorithm . LH.genericUpdateHash) l 82 | , bench "large-hashable-serial (TH)" $ whnf (Serial.serialLargeHash LH.md5HashAlgorithm) l 83 | ] 84 | , env TextBench.setup $ \ ~(text, string) -> 85 | bgroup "text" 86 | [ bench "text-utf8" $ whnf TextBench.benchTextUtf8 text 87 | , bench "text-utf16" $ whnf TextBench.benchTextUtf16 text 88 | , bench "text-unicode" $ whnf TextBench.benchTextUnicode text 89 | , bench "string" $ whnf TextBench.benchTextString string 90 | ] 91 | ] 92 | -------------------------------------------------------------------------------- /benchmark/Data/LargeHashable/Benchmarks/Serial.hs: -------------------------------------------------------------------------------- 1 | {-# OPTIONS_GHC -fno-warn-orphans #-} 2 | module Data.LargeHashable.Benchmarks.Serial (serialLargeHash) where 3 | 4 | import Data.LargeHashable.Intern 5 | import Data.LargeHashable.Class 6 | import Data.Bytes.Put 7 | import Data.Bytes.Serial 8 | 9 | serialLargeHash :: Serial a => HashAlgorithm h -> a -> h 10 | serialLargeHash algo a = runLH algo $ serialize a 11 | 12 | instance MonadPut LH where 13 | flush = return () 14 | 15 | putWord8 = updateHash 16 | putWord16host = updateHash 17 | putWord32host = updateHash 18 | putWord64host = updateHash 19 | putWordhost = updateHash 20 | 21 | putWord16le = updateHash 22 | putWord32le = updateHash 23 | putWord64le = updateHash 24 | 25 | putWord16be = updateHash 26 | putWord32be = updateHash 27 | putWord64be = updateHash 28 | 29 | putByteString = updateHash 30 | putLazyByteString = updateHash 31 | -------------------------------------------------------------------------------- /benchmark/Data/LargeHashable/Benchmarks/Text.hs: -------------------------------------------------------------------------------- 1 | {-# LANGUAGE BangPatterns #-} 2 | {-# LANGUAGE CPP #-} 3 | {-# LANGUAGE ScopedTypeVariables #-} 4 | module Data.LargeHashable.Benchmarks.Text ( 5 | setup, 6 | benchTextUtf8, 7 | benchTextUtf16, 8 | benchTextUnicode, 9 | benchTextString 10 | ) where 11 | 12 | import Data.Char (ord) 13 | import qualified Data.Text as T 14 | import qualified Data.Text.Encoding as TE 15 | import qualified Data.Text.Foreign as TF 16 | import qualified Data.Text.IO as T 17 | import qualified Data.ByteString as BS 18 | import Data.LargeHashable 19 | import Data.LargeHashable.Intern 20 | import Data.Word 21 | import GHC.Ptr 22 | 23 | setup :: IO (T.Text, String) 24 | setup = do 25 | #if MIN_VERSION_text(2,0,0) 26 | text <- T.readFile "test/bigfile.txt" 27 | let string = T.unpack text 28 | return (length string `seq` (text, string)) 29 | #else 30 | fail "benchmark for text only available with text >= 2" 31 | #endif 32 | 33 | -- Hash the native UTF8 encoding 34 | benchTextUtf8 :: T.Text -> MD5Hash 35 | benchTextUtf8 !t = runLH md5HashAlgorithm go 36 | where 37 | go = do 38 | updates <- hashUpdates 39 | ioInLH $ do 40 | TF.useAsPtr t $ \(valPtr :: Ptr Word8) (units :: TF.I8) -> 41 | hu_updatePtr updates (castPtr valPtr) (fromIntegral units) 42 | hu_updateULong updates (fromIntegral (T.length t)) 43 | 44 | -- Hash the UTF16 encoding for backwards compat with text-1.* 45 | benchTextUtf16 :: T.Text -> MD5Hash 46 | benchTextUtf16 !t = 47 | let b = TE.encodeUtf16BE t -- encoding of text-1.* (at least on mac with big endian) 48 | in runLH md5HashAlgorithm (go b) 49 | where 50 | go b = do 51 | updates <- hashUpdates 52 | ioInLH $ do 53 | ptr <- BS.useAsCString b return 54 | hu_updatePtr updates (castPtr ptr) (BS.length b) 55 | hu_updateULong updates (fromIntegral (T.length t)) 56 | 57 | benchTextUnicode :: T.Text -> MD5Hash 58 | benchTextUnicode !t = runLH md5HashAlgorithm go 59 | where 60 | go = do 61 | updates <- hashUpdates 62 | ioInLH $ do 63 | hashStringNoLength updates (T.unpack t) 64 | hu_updateULong updates (fromIntegral (T.length t)) 65 | 66 | hashStringNoLength :: HashUpdates -> String -> IO () 67 | hashStringNoLength updates = loop 68 | where 69 | loop :: [Char] -> IO () 70 | loop [] = pure () 71 | loop (c:cs) = do 72 | _ <- hu_updateUInt updates (c2w c) 73 | loop cs 74 | 75 | benchTextString:: String -> MD5Hash 76 | benchTextString s = runLH md5HashAlgorithm $ do 77 | updates <- hashUpdates 78 | ioInLH $ hashString updates s 79 | 80 | hashString :: HashUpdates -> String -> IO () 81 | hashString updates = loop 0 82 | where 83 | loop :: Int -> [Char] -> IO () 84 | loop !n [] = hu_updateULong updates (fromIntegral n) 85 | loop !n (c:cs) = do 86 | _ <- hu_updateUInt updates (c2w c) 87 | loop (n+1) cs 88 | 89 | {-# INLINE c2w #-} 90 | c2w :: Char -> Word32 91 | c2w c = toEnum (ord c :: Int) 92 | -------------------------------------------------------------------------------- /benchmark/Main.hs: -------------------------------------------------------------------------------- 1 | module Main where 2 | 3 | import qualified Data.LargeHashable.Benchmarks.Main as M 4 | 5 | main :: IO () 6 | main = M.main 7 | -------------------------------------------------------------------------------- /cbits/md5.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (C) 2006-2009 Vincent Hanquez 3 | * 2016 Herbert Valerio Riedel 4 | * 5 | * Redistribution and use in source and binary forms, with or without 6 | * modification, are permitted provided that the following conditions 7 | * are met: 8 | * 1. Redistributions of source code must retain the above copyright 9 | * notice, this list of conditions and the following disclaimer. 10 | * 2. Redistributions in binary form must reproduce the above copyright 11 | * notice, this list of conditions and the following disclaimer in the 12 | * documentation and/or other materials provided with the distribution. 13 | * 14 | * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR 15 | * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 16 | * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 17 | * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, 18 | * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 19 | * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 20 | * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 21 | * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 22 | * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF 23 | * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 24 | */ 25 | 26 | #ifndef MD5_H 27 | #define MD5_H 28 | 29 | #include 30 | #include 31 | #include 32 | #include 33 | #include 34 | 35 | struct md5_ctx 36 | { 37 | uint64_t sz; 38 | uint8_t buf[64]; 39 | uint32_t h[4]; 40 | }; 41 | 42 | #define MD5_DIGEST_SIZE 16 43 | #define MD5_CTX_SIZE 88 44 | 45 | static inline void md5_init(struct md5_ctx *ctx); 46 | static inline void md5_update(struct md5_ctx *ctx, const uint8_t *data, size_t len); 47 | static inline void md5_update_uchar(struct md5_ctx *ctx, uint8_t data); 48 | static inline void md5_update_ushort(struct md5_ctx *ctx, uint16_t data); 49 | static inline void md5_update_uint(struct md5_ctx *ctx, uint32_t data); 50 | static inline void md5_update_ulong(struct md5_ctx *ctx, uint64_t data); 51 | static inline uint64_t md5_finalize(struct md5_ctx *ctx, uint8_t *out); 52 | 53 | #if defined(static_assert) 54 | static_assert(sizeof(struct md5_ctx) == MD5_CTX_SIZE, "unexpected md5_ctx size"); 55 | #else 56 | /* poor man's pre-C11 _Static_assert */ 57 | typedef char static_assertion__unexpected_md5_ctx_size[(sizeof(struct md5_ctx) == MD5_CTX_SIZE)?1:-1]; 58 | #endif 59 | 60 | #define ptr_uint32_aligned(ptr) (!((uintptr_t)(ptr) & 0x3)) 61 | 62 | static inline uint32_t 63 | rol32(const uint32_t word, const unsigned shift) 64 | { 65 | /* GCC usually transforms this into a 'rol'-insn */ 66 | return (word << shift) | (word >> (32 - shift)); 67 | } 68 | 69 | static inline uint32_t 70 | cpu_to_le32(const uint32_t hl) 71 | { 72 | #if !WORDS_BIGENDIAN 73 | return hl; 74 | #elif __GNUC__ > 4 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 3) 75 | return __builtin_bswap32(hl); 76 | #else 77 | /* GCC usually transforms this into a bswap insn */ 78 | return ((hl & 0xff000000) >> 24) | 79 | ((hl & 0x00ff0000) >> 8) | 80 | ((hl & 0x0000ff00) << 8) | 81 | ( hl << 24); 82 | #endif 83 | } 84 | 85 | static inline void 86 | cpu_to_le32_array(uint32_t *dest, const uint32_t *src, unsigned wordcnt) 87 | { 88 | while (wordcnt--) 89 | *dest++ = cpu_to_le32(*src++); 90 | } 91 | 92 | static inline uint64_t 93 | cpu_to_le64(const uint64_t hll) 94 | { 95 | #if !WORDS_BIGENDIAN 96 | return hll; 97 | #elif __GNUC__ > 4 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 3) 98 | return __builtin_bswap64(hll); 99 | #else 100 | return ((uint64_t)cpu_to_le32(hll & 0xffffffff) << 32LL) | cpu_to_le32(hll >> 32); 101 | #endif 102 | } 103 | 104 | 105 | static inline void 106 | md5_init(struct md5_ctx *ctx) 107 | { 108 | memset(ctx, 0, sizeof(*ctx)); 109 | 110 | ctx->h[0] = 0x67452301; 111 | ctx->h[1] = 0xefcdab89; 112 | ctx->h[2] = 0x98badcfe; 113 | ctx->h[3] = 0x10325476; 114 | } 115 | 116 | #define f1(x, y, z) (z ^ (x & (y ^ z))) 117 | #define f2(x, y, z) f1(z, x, y) 118 | #define f3(x, y, z) (x ^ y ^ z) 119 | #define f4(x, y, z) (y ^ (x | ~z)) 120 | #define R(f, a, b, c, d, i, k, s) a += f(b, c, d) + w[i] + k; a = rol32(a, s); a += b 121 | 122 | static void 123 | md5_do_chunk_aligned(struct md5_ctx *ctx, const uint32_t w[]) 124 | { 125 | uint32_t a = ctx->h[0]; 126 | uint32_t b = ctx->h[1]; 127 | uint32_t c = ctx->h[2]; 128 | uint32_t d = ctx->h[3]; 129 | 130 | R(f1, a, b, c, d, 0, 0xd76aa478, 7); 131 | R(f1, d, a, b, c, 1, 0xe8c7b756, 12); 132 | R(f1, c, d, a, b, 2, 0x242070db, 17); 133 | R(f1, b, c, d, a, 3, 0xc1bdceee, 22); 134 | R(f1, a, b, c, d, 4, 0xf57c0faf, 7); 135 | R(f1, d, a, b, c, 5, 0x4787c62a, 12); 136 | R(f1, c, d, a, b, 6, 0xa8304613, 17); 137 | R(f1, b, c, d, a, 7, 0xfd469501, 22); 138 | R(f1, a, b, c, d, 8, 0x698098d8, 7); 139 | R(f1, d, a, b, c, 9, 0x8b44f7af, 12); 140 | R(f1, c, d, a, b, 10, 0xffff5bb1, 17); 141 | R(f1, b, c, d, a, 11, 0x895cd7be, 22); 142 | R(f1, a, b, c, d, 12, 0x6b901122, 7); 143 | R(f1, d, a, b, c, 13, 0xfd987193, 12); 144 | R(f1, c, d, a, b, 14, 0xa679438e, 17); 145 | R(f1, b, c, d, a, 15, 0x49b40821, 22); 146 | 147 | R(f2, a, b, c, d, 1, 0xf61e2562, 5); 148 | R(f2, d, a, b, c, 6, 0xc040b340, 9); 149 | R(f2, c, d, a, b, 11, 0x265e5a51, 14); 150 | R(f2, b, c, d, a, 0, 0xe9b6c7aa, 20); 151 | R(f2, a, b, c, d, 5, 0xd62f105d, 5); 152 | R(f2, d, a, b, c, 10, 0x02441453, 9); 153 | R(f2, c, d, a, b, 15, 0xd8a1e681, 14); 154 | R(f2, b, c, d, a, 4, 0xe7d3fbc8, 20); 155 | R(f2, a, b, c, d, 9, 0x21e1cde6, 5); 156 | R(f2, d, a, b, c, 14, 0xc33707d6, 9); 157 | R(f2, c, d, a, b, 3, 0xf4d50d87, 14); 158 | R(f2, b, c, d, a, 8, 0x455a14ed, 20); 159 | R(f2, a, b, c, d, 13, 0xa9e3e905, 5); 160 | R(f2, d, a, b, c, 2, 0xfcefa3f8, 9); 161 | R(f2, c, d, a, b, 7, 0x676f02d9, 14); 162 | R(f2, b, c, d, a, 12, 0x8d2a4c8a, 20); 163 | 164 | R(f3, a, b, c, d, 5, 0xfffa3942, 4); 165 | R(f3, d, a, b, c, 8, 0x8771f681, 11); 166 | R(f3, c, d, a, b, 11, 0x6d9d6122, 16); 167 | R(f3, b, c, d, a, 14, 0xfde5380c, 23); 168 | R(f3, a, b, c, d, 1, 0xa4beea44, 4); 169 | R(f3, d, a, b, c, 4, 0x4bdecfa9, 11); 170 | R(f3, c, d, a, b, 7, 0xf6bb4b60, 16); 171 | R(f3, b, c, d, a, 10, 0xbebfbc70, 23); 172 | R(f3, a, b, c, d, 13, 0x289b7ec6, 4); 173 | R(f3, d, a, b, c, 0, 0xeaa127fa, 11); 174 | R(f3, c, d, a, b, 3, 0xd4ef3085, 16); 175 | R(f3, b, c, d, a, 6, 0x04881d05, 23); 176 | R(f3, a, b, c, d, 9, 0xd9d4d039, 4); 177 | R(f3, d, a, b, c, 12, 0xe6db99e5, 11); 178 | R(f3, c, d, a, b, 15, 0x1fa27cf8, 16); 179 | R(f3, b, c, d, a, 2, 0xc4ac5665, 23); 180 | 181 | R(f4, a, b, c, d, 0, 0xf4292244, 6); 182 | R(f4, d, a, b, c, 7, 0x432aff97, 10); 183 | R(f4, c, d, a, b, 14, 0xab9423a7, 15); 184 | R(f4, b, c, d, a, 5, 0xfc93a039, 21); 185 | R(f4, a, b, c, d, 12, 0x655b59c3, 6); 186 | R(f4, d, a, b, c, 3, 0x8f0ccc92, 10); 187 | R(f4, c, d, a, b, 10, 0xffeff47d, 15); 188 | R(f4, b, c, d, a, 1, 0x85845dd1, 21); 189 | R(f4, a, b, c, d, 8, 0x6fa87e4f, 6); 190 | R(f4, d, a, b, c, 15, 0xfe2ce6e0, 10); 191 | R(f4, c, d, a, b, 6, 0xa3014314, 15); 192 | R(f4, b, c, d, a, 13, 0x4e0811a1, 21); 193 | R(f4, a, b, c, d, 4, 0xf7537e82, 6); 194 | R(f4, d, a, b, c, 11, 0xbd3af235, 10); 195 | R(f4, c, d, a, b, 2, 0x2ad7d2bb, 15); 196 | R(f4, b, c, d, a, 9, 0xeb86d391, 21); 197 | 198 | ctx->h[0] += a; 199 | ctx->h[1] += b; 200 | ctx->h[2] += c; 201 | ctx->h[3] += d; 202 | } 203 | 204 | static void 205 | md5_do_chunk(struct md5_ctx *ctx, const uint8_t buf[]) 206 | { 207 | if (ptr_uint32_aligned(buf)) { /* aligned buf */ 208 | #if WORDS_BIGENDIAN 209 | uint32_t w[16]; cpu_to_le32_array(w, (const uint32_t *)buf, 16); 210 | #else 211 | const uint32_t *w = (const uint32_t *)buf; 212 | #endif 213 | md5_do_chunk_aligned(ctx, w); 214 | } else { /* unaligned buf */ 215 | uint32_t w[16]; memcpy(w, buf, 64); 216 | #if WORDS_BIGENDIAN 217 | cpu_to_le32_array(w, w, 16); 218 | #endif 219 | md5_do_chunk_aligned(ctx, w); 220 | } 221 | } 222 | 223 | static inline void 224 | md5_update(struct md5_ctx *ctx, const uint8_t *data, size_t len) 225 | { 226 | size_t index = ctx->sz & 0x3f; 227 | const size_t to_fill = 64 - index; 228 | 229 | ctx->sz += len; 230 | 231 | /* process partial buffer if there's enough data to make a block */ 232 | if (index && len >= to_fill) { 233 | memcpy(ctx->buf + index, data, to_fill); 234 | md5_do_chunk(ctx, ctx->buf); 235 | /* memset(ctx->buf, 0, 64); */ 236 | len -= to_fill; 237 | data += to_fill; 238 | index = 0; 239 | } 240 | 241 | /* process as many 64-blocks as possible */ 242 | while (len >= 64) { 243 | md5_do_chunk(ctx, data); 244 | len -= 64; 245 | data += 64; 246 | } 247 | 248 | /* append data into buf */ 249 | if (len) 250 | memcpy(ctx->buf + index, data, len); 251 | } 252 | 253 | static inline void md5_update_uchar(struct md5_ctx *ctx, uint8_t data) 254 | { 255 | md5_update(ctx, &data, sizeof(data)); 256 | } 257 | 258 | static inline void md5_update_ushort(struct md5_ctx *ctx, uint16_t data) 259 | { 260 | md5_update(ctx, (uint8_t *)&data, sizeof(data)); 261 | } 262 | 263 | static inline void md5_update_uint(struct md5_ctx *ctx, uint32_t data) 264 | { 265 | md5_update(ctx, (uint8_t *)&data, sizeof(data)); 266 | } 267 | 268 | static inline void md5_update_ulong(struct md5_ctx *ctx, uint64_t data) 269 | { 270 | md5_update(ctx, (uint8_t *)&data, sizeof(data)); 271 | } 272 | 273 | static inline uint64_t 274 | md5_finalize(struct md5_ctx *ctx, uint8_t *out) 275 | { 276 | static const uint8_t padding[64] = { 0x80, }; 277 | const uint64_t sz = ctx->sz; 278 | 279 | /* add padding and update data with it */ 280 | const uint64_t bits = cpu_to_le64(ctx->sz << 3); 281 | 282 | /* pad out to 56 */ 283 | const size_t index = (ctx->sz & 0x3f); 284 | const size_t padlen = (index < 56) ? (56 - index) : ((64 + 56) - index); 285 | md5_update(ctx, padding, padlen); 286 | 287 | /* append length */ 288 | md5_update(ctx, (const uint8_t *) &bits, sizeof(bits)); 289 | 290 | /* output hash */ 291 | cpu_to_le32_array((uint32_t *) out, ctx->h, 4); 292 | 293 | return sz; 294 | } 295 | 296 | #endif 297 | -------------------------------------------------------------------------------- /default.nix: -------------------------------------------------------------------------------- 1 | let pkgs = import { }; 2 | in pkgs.haskellPackages.callPackage ./large-hashable.nix { } 3 | -------------------------------------------------------------------------------- /hie.yaml: -------------------------------------------------------------------------------- 1 | cradle: 2 | stack: 3 | - path: "./src" 4 | component: "large-hashable:lib" 5 | 6 | - path: "./benchmark" 7 | component: "large-hashable:bench:large-hashable-benchmark" 8 | 9 | - path: "./test" 10 | component: "large-hashable:test:large-hashable-test" 11 | -------------------------------------------------------------------------------- /large-hashable.cabal: -------------------------------------------------------------------------------- 1 | name: large-hashable 2 | version: 0.1.2.0 3 | synopsis: Efficiently hash (large) Haskell values 4 | description: Please see README.md 5 | homepage: https://github.com/factisresearch/large-hashable 6 | license: BSD3 7 | license-file: LICENSE 8 | author: Stefan Wehr, Lukas Epple 9 | maintainer: Stefan Wehr 10 | copyright: 2015 - 2017 factis research GmbH, 2018 - 2023 medilyse GmbH, 2024 - 2025 MEQO GmbH 11 | category: Web 12 | build-type: Simple 13 | cabal-version: 2.0 14 | extra-source-files: 15 | cbits/md5.h 16 | README.md 17 | ChangeLog 18 | default.nix 19 | stack.yaml 20 | test/bigfile.txt 21 | stack-ghc-9.2.yaml 22 | stack-ghc-9.4.yaml 23 | stack-ghc-9.6.yaml 24 | stack-ghc-9.8.yaml 25 | 26 | library 27 | hs-source-dirs: src 28 | include-dirs: cbits 29 | exposed-modules: Data.LargeHashable 30 | , Data.LargeHashable.Class 31 | , Data.LargeHashable.MD5 32 | , Data.LargeHashable.Intern 33 | , Data.LargeHashable.LargeWord 34 | , Data.LargeHashable.TH 35 | other-modules: Data.LargeHashable.Endianness 36 | build-depends: aeson 37 | , base >= 4.8 && < 5 38 | , base16-bytestring 39 | , bytes 40 | , bytestring 41 | , containers 42 | , crypton 43 | , memory 44 | , scientific 45 | , strict 46 | , template-haskell 47 | , text 48 | , time 49 | , transformers 50 | , unordered-containers 51 | , vector 52 | , void 53 | build-tool-depends: cpphs:cpphs 54 | default-language: Haskell2010 55 | ghc-options: -optc -O3 -W -fwarn-unused-imports -fwarn-unused-binds 56 | -fwarn-unused-matches -fwarn-unused-do-bind -fwarn-wrong-do-bind 57 | -fno-warn-name-shadowing 58 | -fwarn-missing-signatures -O2 59 | 60 | benchmark large-hashable-benchmark 61 | type: exitcode-stdio-1.0 62 | hs-source-dirs: benchmark 63 | main-is: Main.hs 64 | other-modules: Data.LargeHashable.Benchmarks.CryptoHash 65 | , Data.LargeHashable.Benchmarks.Main 66 | , Data.LargeHashable.Benchmarks.Serial 67 | , Data.LargeHashable.Benchmarks.Text 68 | ghc-options: -optc -O3 -threaded -rtsopts -with-rtsopts=-N 69 | -W -fwarn-unused-imports -fwarn-unused-binds 70 | -fwarn-unused-matches -fwarn-unused-do-bind -fwarn-wrong-do-bind 71 | -pgmP "cpphs --cpp" -rtsopts -threaded -funbox-strict-fields 72 | -fwarn-missing-signatures -fno-warn-name-shadowing -O2 73 | build-depends: base >= 4.8 && < 5 74 | , base16-bytestring 75 | , criterion 76 | , large-hashable 77 | , safecopy 78 | , text 79 | , deepseq 80 | , cryptohash 81 | , bytestring 82 | , cereal 83 | , byteable 84 | , transformers 85 | , bytes 86 | build-tool-depends: cpphs:cpphs 87 | default-language: Haskell2010 88 | 89 | test-suite large-hashable-test 90 | type: exitcode-stdio-1.0 91 | hs-source-dirs: test 92 | main-is: Main.hs 93 | build-depends: aeson 94 | , HTF 95 | , QuickCheck 96 | , base >= 4.8 && < 5 97 | , bytes 98 | , bytestring 99 | , containers 100 | , hashable 101 | , large-hashable 102 | , scientific 103 | , strict 104 | , text 105 | , time 106 | , unordered-containers 107 | , vector 108 | , inspection-testing 109 | ghc-options: -optc -O3 -threaded -rtsopts -with-rtsopts=-N 110 | -W -fwarn-unused-imports -fwarn-unused-binds 111 | -fwarn-unused-matches -fwarn-unused-do-bind -fwarn-wrong-do-bind 112 | -pgmP "cpphs --cpp" -rtsopts -threaded -funbox-strict-fields 113 | -fwarn-missing-signatures -fno-warn-name-shadowing 114 | build-tool-depends: cpphs:cpphs 115 | default-language: Haskell2010 116 | other-modules: Data.LargeHashable.Tests.Class 117 | , Data.LargeHashable.Tests.Helper 118 | , Data.LargeHashable.Tests.Inspection 119 | , Data.LargeHashable.Tests.TH 120 | , Data.LargeHashable.Tests.LargeWord 121 | , Data.LargeHashable.Tests.MD5 122 | , Data.LargeHashable.Tests.Stable 123 | 124 | source-repository head 125 | type: git 126 | location: https://github.com/factisresearch/large-hashable 127 | -------------------------------------------------------------------------------- /large-hashable.nix: -------------------------------------------------------------------------------- 1 | { mkDerivation, aeson, base, base16-bytestring, bytes, bytestring 2 | , containers, hashable, HTF, QuickCheck, scientific, stdenv, strict 3 | , template-haskell, text, time, transformers, unordered-containers 4 | , utf8-light, vector, void 5 | }: 6 | mkDerivation { 7 | pname = "large-hashable"; 8 | version = "0.1.0.3"; 9 | src = ./.; 10 | libraryHaskellDepends = [ 11 | aeson base base16-bytestring bytes bytestring containers scientific 12 | strict template-haskell text time transformers unordered-containers 13 | utf8-light vector void 14 | ]; 15 | testHaskellDepends = [ 16 | aeson base bytes bytestring containers hashable HTF QuickCheck 17 | scientific strict text time unordered-containers vector 18 | ]; 19 | homepage = "https://github.com/factisresearch/large-hashable"; 20 | description = "Efficiently hash (large) Haskell values"; 21 | license = stdenv.lib.licenses.bsd3; 22 | } 23 | -------------------------------------------------------------------------------- /run-benchmarks.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | cd "$(dirname "$0")" 4 | 5 | if type stack >/dev/null 2>&1 6 | then 7 | echo "Compiling ..." 8 | stack build || exit 1 9 | echo "Done compiling" 10 | exe=$(stack exec -- which large-hashable-benchmark) 11 | else 12 | exe=./dist/build/large-hashable-benchmark/large-hashable-benchmark 13 | echo "Not compiling, using executable $exe" 14 | fi 15 | 16 | function run() 17 | { 18 | echo 19 | echo "Running benchmark $1" 20 | $exe "$1" +RTS -s 2>&1 | egrep 'total memory in use|Total time|Productivity|bytes allocated in the heap' 21 | if ! test ${PIPESTATUS[0]} -eq 0; then 22 | echo "Benchmark $1 failed!" 23 | exit 1 24 | fi 25 | } 26 | 27 | run dry 28 | run safecopy 29 | run cryptohash 30 | run large-hashable-serial 31 | run large-hashable 32 | -------------------------------------------------------------------------------- /run-with-profiling: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | if [ -z "$1" -o "$1" == "--help" ]; then 4 | echo "USAGE: $0 [--heap] BENCHMARK_NAME" 5 | exit 1 6 | fi 7 | 8 | prof=time 9 | if [ "$1" == "--heap" ]; then 10 | prof=heap 11 | shift 12 | fi 13 | 14 | stack build --executable-profiling --library-profiling --ghc-options=-fprof-auto 15 | 16 | exe=$(stack exec -- which large-hashable-benchmark) 17 | 18 | if [ "$prof" == "time" ]; then 19 | $exe $1 +RTS -p 20 | else 21 | $exe $1 +RTS -hd 22 | fi 23 | -------------------------------------------------------------------------------- /scripts/ci-check: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | if [ -z "$LH_TEST_STACK_YAML" ]; then 4 | echo > /dev/stderr "Environment variable LH_TEST_STACK_YAML not set" 5 | exit 1 6 | fi 7 | 8 | STACK_WORK=$HOME/.LH-stack-work 9 | export LH_TEST_STACK_ARGS="--stack-yaml $LH_TEST_STACK_YAML" 10 | export LANG=C.UTF-8 11 | export LC_ALL=C.UTF-8 12 | echo "Environment variables:" 13 | env 14 | 15 | STACK_OPTS="$LH_TEST_STACK_ARGS --allow-different-user" 16 | 17 | echo -n "Working directory: " 18 | pwd 19 | 20 | echo -n "Cached ~/.stack directory: " 21 | ls -l ~/.stack 2> /dev/stdout 22 | 23 | echo -n "Cached $STACK_WORK directory: " 24 | ls -l $STACK_WORK 2> /dev/stdout 25 | 26 | echo -n "Local .stack-work directory: " 27 | ls -l .stack-work 2> /dev/stdout 28 | 29 | if [ -d $STACK_WORK -a ! -d .stack-work ]; then 30 | echo "Restoring .stack-work from $STACK_WORK ..." 31 | cp -r $STACK_WORK .stack-work 32 | echo "Done restoring .stack-work from $STACK_WORK" 33 | fi 34 | 35 | echo "Running stack test --only-dependencies ..." 36 | time stack $STACK_OPTS test --only-dependencies || exit 1 37 | echo "Done running stack test --only-dependencies" 38 | 39 | echo "Saving .stack-work to $STACK_WORK ..." 40 | rm -rf $STACK_WORK 41 | cp -r .stack-work $STACK_WORK 42 | echo "Done saving .stack-work to $STACK_WORK" 43 | 44 | echo "Running stack test ..." 45 | stack $STACK_OPTS test --haddock || exit 1 46 | echo "Done running stack test" 47 | -------------------------------------------------------------------------------- /src/Data/LargeHashable.hs: -------------------------------------------------------------------------------- 1 | -- | This is the top-level module of LargeHashable, a library 2 | -- for efficiently hashing any Haskell data type using a 3 | -- hash algorithm like MD5, SHA256 etc. 4 | -- 5 | -- Normal users should import this module. 6 | module Data.LargeHashable ( 7 | LargeHashable(..) 8 | , LargeHashable'(..) 9 | , LH 10 | , HashAlgorithm 11 | , largeHash 12 | , largeHashStable 13 | , deriveLargeHashable 14 | , deriveLargeHashableNoCtx 15 | , deriveLargeHashableCtx 16 | , deriveLargeHashableCustomCtx 17 | , MD5Hash(..) 18 | , md5HashAlgorithm 19 | , runMD5 20 | , module Data.LargeHashable.LargeWord 21 | ) where 22 | 23 | import Data.LargeHashable.Class 24 | import Data.LargeHashable.Intern 25 | import Data.LargeHashable.LargeWord 26 | import Data.LargeHashable.MD5 27 | import Data.LargeHashable.TH 28 | -------------------------------------------------------------------------------- /src/Data/LargeHashable/Class.hs: -------------------------------------------------------------------------------- 1 | -- | This module defines the central type class `LargeHashable` of this package. 2 | {-# LANGUAGE BangPatterns #-} 3 | {-# LANGUAGE CPP #-} 4 | {-# LANGUAGE DataKinds #-} 5 | {-# LANGUAGE DefaultSignatures #-} 6 | {-# LANGUAGE FlexibleContexts #-} 7 | {-# LANGUAGE FlexibleInstances #-} 8 | {-# LANGUAGE KindSignatures #-} 9 | {-# LANGUAGE MultiParamTypeClasses #-} 10 | {-# LANGUAGE RankNTypes #-} 11 | {-# LANGUAGE ScopedTypeVariables #-} 12 | {-# LANGUAGE TypeOperators #-} 13 | module Data.LargeHashable.Class ( 14 | 15 | LargeHashable(..), largeHash, largeHashStable, LargeHashable'(..), genericUpdateHash, 16 | updateHashList 17 | ) where 18 | 19 | -- keep imports in alphabetic order (in Emacs, use "M-x sort-lines") 20 | import Data.Bits 21 | import Data.Char (ord) 22 | import Data.Fixed 23 | import Data.Foldable 24 | import Data.Int 25 | import Data.LargeHashable.Endianness 26 | import Data.LargeHashable.Intern 27 | import Data.Ratio 28 | import Data.Time 29 | import Data.Time.Clock.TAI 30 | import Data.Void (Void) 31 | import Data.Word 32 | import Foreign.C.Types 33 | import Foreign.Ptr 34 | import GHC.Generics 35 | import qualified Data.Aeson as J 36 | #if MIN_VERSION_aeson(2,0,0) 37 | import qualified Data.Aeson.Key as AesonKey 38 | import qualified Data.Aeson.KeyMap as AesonKeyMap 39 | #endif 40 | import qualified Data.ByteString as B 41 | import qualified Data.ByteString.Lazy as BL 42 | import qualified Data.ByteString.Lazy.Internal as BLI 43 | import qualified Data.ByteString.Short as BS 44 | import qualified Data.Foldable as F 45 | import qualified Data.HashMap.Lazy as HashMap 46 | import qualified Data.HashSet as HashSet 47 | import qualified Data.IntMap as IntMap 48 | import qualified Data.IntSet as IntSet 49 | import qualified Data.Map as M 50 | import qualified Data.Scientific as Sci 51 | import qualified Data.Sequence as Seq 52 | import qualified Data.Set as S 53 | import qualified Data.Strict.Tuple as Tuple 54 | import qualified Data.Text as T 55 | import qualified Data.Text.Encoding as TE 56 | import qualified Data.Text.Foreign as TF 57 | import qualified Data.Text.Internal.Lazy as TLI 58 | import qualified Data.Text.Lazy as TL 59 | import qualified Data.Vector as V 60 | import Data.Kind (Type) 61 | 62 | -- | A type class for computing hashes (i.e. MD5, SHA256, ...) from 63 | -- haskell values. 64 | -- 65 | -- The laws of this typeclass are the following: 66 | -- 67 | -- (1) If two values are equal 68 | -- according to '==', then the finally computed hashes must also be equal 69 | -- according to '=='. However it is not required that the hashes of inequal 70 | -- values have to be inequal. Also note that an instance of 'LargeHashable' 71 | -- does not require a instance of 'Eq'. Using any sane algorithm the chance 72 | -- of a collision should be 1 / n where n is the number of different hashes 73 | -- possible. 74 | -- 75 | -- (2) If two values are inequal 76 | -- according to '==', then the probability of a hash collision is 1/n, 77 | -- where n is the number of possible hashes produced by the 78 | -- underlying hash algorithm. 79 | -- 80 | -- A rule of thumb: hash all information that you would also need for 81 | -- serializing/deserializing values of your datatype. For instance, when 82 | -- hashing lists, you would not only hash the list elements but also the 83 | -- length of the list. Consider the following datatype 84 | -- 85 | -- > data Foo = Foo [Int] [Int] 86 | -- 87 | -- We now write an instance for LargeHashable like this 88 | -- 89 | -- > instance LargeHashable Foo where 90 | -- > updateHash (Foo l1 l2) = updateHash l1 >> updateHash l2 91 | -- 92 | -- If we did not hash the length of a list, then the following two values 93 | -- of @Foo@ would produce identical hashes: 94 | -- 95 | -- > Foo [1,2,3] [] 96 | -- > Foo [1] [2,3] 97 | -- 98 | class LargeHashable a where 99 | updateHash :: a -> LH () 100 | default updateHash :: (GenericLargeHashable (Rep a), Generic a) => a -> LH () 101 | updateHash = genericUpdateHash 102 | updateHashStable :: a -> LH () 103 | default updateHashStable :: (GenericLargeHashable (Rep a), Generic a) => a -> LH () 104 | updateHashStable = genericUpdateHashStable 105 | 106 | class LargeHashable' t where 107 | updateHash' :: LargeHashable a => t a -> LH () 108 | updateHashStable' :: LargeHashable a => t a -> LH () 109 | 110 | -- | 'largeHash' is the central function of this package. 111 | -- For a given value it computes a 'Hash' using the given 112 | -- 'HashAlgorithm'. The library tries to keep the 113 | -- hash values for @LargeHashable@ instances provided by 114 | -- library stable across releases, but there is no guarantee. 115 | -- See @largeHashStable& 116 | largeHash :: LargeHashable a => HashAlgorithm h -> a -> h 117 | largeHash algo x = runLH algo (updateHash x) 118 | 119 | -- | 'largeHashStable' is similar to @largeHash@, but the hash 120 | -- value is guaranteed to remain stable across releases, 121 | -- even if this causes performance to degrade. 122 | largeHashStable :: LargeHashable a => HashAlgorithm h -> a -> h 123 | largeHashStable algo x = runLH algo (updateHashStable x) 124 | 125 | {-# INLINE updateHashTextData #-} 126 | updateHashTextData :: T.Text -> LH () 127 | updateHashTextData !t = do 128 | updates <- hashUpdates 129 | ioInLH $ do 130 | #if MIN_VERSION_text(2,0,0) 131 | TF.useAsPtr t $ \(valPtr :: Ptr Word8) (units :: TF.I8) -> 132 | hu_updatePtr updates (castPtr valPtr) (fromIntegral units) 133 | #else 134 | -- UTF-16 encoding 135 | TF.useAsPtr t $ \(valPtr :: Ptr Word16) (units :: TF.I16) -> 136 | hu_updatePtr updates (castPtr valPtr) (fromIntegral (2 * units)) 137 | #endif 138 | return () 139 | 140 | {-# INLINE updateHashText #-} 141 | updateHashText :: T.Text -> LH () 142 | updateHashText !t = do 143 | updateHashTextData t 144 | updates <- hashUpdates 145 | ioInLH $ hu_updateULong updates (fromIntegral (T.length t)) 146 | 147 | {-# INLINE updateHashStableTextData #-} 148 | updateHashStableTextData :: T.Text -> LH () 149 | updateHashStableTextData t = do 150 | let bs = 151 | case systemEndianness of 152 | LittleEndian -> TE.encodeUtf16LE t 153 | BigEndian -> TE.encodeUtf16BE t 154 | updateHashByteStringData bs 155 | 156 | {-# INLINE updateHashStableText #-} 157 | updateHashStableText :: T.Text -> LH () 158 | updateHashStableText t = do 159 | updateHashStableTextData t 160 | updates <- hashUpdates 161 | ioInLH $ hu_updateULong updates (fromIntegral (T.length t)) 162 | 163 | instance LargeHashable T.Text where 164 | updateHash = updateHashText 165 | updateHashStable = updateHashStableText 166 | 167 | {-# INLINE updateHashLazyText #-} 168 | updateHashLazyText :: Int -> TL.Text -> LH () 169 | updateHashLazyText !len (TLI.Chunk !t !next) = do 170 | updateHashTextData t 171 | updateHashLazyText (len + T.length t) next 172 | updateHashLazyText !len TLI.Empty = updateHash len 173 | 174 | {-# INLINE updateHashStableLazyText #-} 175 | updateHashStableLazyText :: Int -> TL.Text -> LH () 176 | updateHashStableLazyText !len (TLI.Chunk !t !next) = do 177 | updateHashStableTextData t 178 | updateHashStableLazyText (len + T.length t) next 179 | updateHashStableLazyText !len TLI.Empty = updateHash len 180 | 181 | instance LargeHashable TL.Text where 182 | updateHash = updateHashLazyText 0 183 | updateHashStable = updateHashStableLazyText 0 184 | 185 | {-# INLINE updateHashByteStringData #-} 186 | updateHashByteStringData :: B.ByteString -> LH () 187 | updateHashByteStringData !b = do 188 | updates <- hashUpdates 189 | ioInLH $ do 190 | ptr <- B.useAsCString b return 191 | hu_updatePtr updates (castPtr ptr) (B.length b) 192 | 193 | {-# INLINE updateHashByteString #-} 194 | updateHashByteString :: B.ByteString -> LH () 195 | updateHashByteString !b = do 196 | updateHashByteStringData b 197 | updates <- hashUpdates 198 | ioInLH $ hu_updateULong updates (fromIntegral (B.length b)) 199 | 200 | instance LargeHashable B.ByteString where 201 | updateHash = updateHashByteString 202 | updateHashStable = updateHash 203 | 204 | {-# INLINE updateHashLazyByteString #-} 205 | updateHashLazyByteString :: Int -> BL.ByteString -> LH () 206 | updateHashLazyByteString !len (BLI.Chunk !bs !next) = do 207 | updateHashByteStringData bs 208 | updateHashLazyByteString (len + B.length bs) next 209 | updateHashLazyByteString !len BLI.Empty = updateHash len 210 | 211 | instance LargeHashable BL.ByteString where 212 | updateHash = updateHashLazyByteString 0 213 | updateHashStable = updateHash 214 | 215 | instance LargeHashable BS.ShortByteString where 216 | updateHash = updateHash . BS.fromShort 217 | updateHashStable = updateHash 218 | 219 | {-# INLINE updateHashWithFun #-} 220 | updateHashWithFun :: (HashUpdates -> a -> IO ()) -> a -> LH () 221 | updateHashWithFun f x = 222 | do updates <- hashUpdates 223 | ioInLH $ f updates x 224 | 225 | instance LargeHashable Int where 226 | updateHash = updateHashWithFun hu_updateULong . fromIntegral 227 | updateHashStable = updateHash 228 | 229 | instance LargeHashable Int8 where 230 | updateHash = updateHashWithFun hu_updateUChar . fromIntegral 231 | updateHashStable = updateHash 232 | 233 | instance LargeHashable Int16 where 234 | updateHash = updateHashWithFun hu_updateUShort . fromIntegral 235 | updateHashStable = updateHash 236 | 237 | instance LargeHashable Int32 where 238 | updateHash = updateHashWithFun hu_updateUInt . fromIntegral 239 | updateHashStable = updateHash 240 | 241 | instance LargeHashable Int64 where 242 | updateHash = updateHashWithFun hu_updateULong . fromIntegral 243 | updateHashStable = updateHash 244 | 245 | instance LargeHashable Word where 246 | updateHash = updateHashWithFun hu_updateULong . fromIntegral 247 | updateHashStable = updateHash 248 | 249 | instance LargeHashable Word8 where 250 | updateHash = updateHashWithFun hu_updateUChar 251 | updateHashStable = updateHash 252 | 253 | instance LargeHashable Word16 where 254 | updateHash = updateHashWithFun hu_updateUShort 255 | updateHashStable = updateHash 256 | 257 | instance LargeHashable Word32 where 258 | updateHash = updateHashWithFun hu_updateUInt 259 | updateHashStable = updateHash 260 | 261 | instance LargeHashable Word64 where 262 | updateHash = updateHashWithFun hu_updateULong . fromIntegral 263 | updateHashStable = updateHash 264 | 265 | instance LargeHashable CChar where 266 | updateHash (CChar i) = updateHashWithFun hu_updateUChar (fromIntegral i) 267 | updateHashStable = updateHash 268 | 269 | instance LargeHashable CShort where 270 | updateHash (CShort i) = updateHashWithFun hu_updateUShort (fromIntegral i) 271 | updateHashStable = updateHash 272 | 273 | instance LargeHashable CInt where 274 | updateHash (CInt i) = updateHashWithFun hu_updateUInt (fromIntegral i) 275 | updateHashStable = updateHash 276 | 277 | instance LargeHashable CLong where 278 | updateHash (CLong i) = updateHashWithFun hu_updateULong (fromIntegral i) 279 | updateHashStable = updateHash 280 | 281 | instance LargeHashable CUChar where 282 | updateHash (CUChar w) = updateHashWithFun hu_updateUChar w 283 | updateHashStable = updateHash 284 | 285 | instance LargeHashable CUShort where 286 | updateHash (CUShort w) = updateHashWithFun hu_updateUShort w 287 | updateHashStable = updateHash 288 | 289 | instance LargeHashable CUInt where 290 | updateHash (CUInt w) = updateHashWithFun hu_updateUInt w 291 | updateHashStable = updateHash 292 | 293 | instance LargeHashable CULong where 294 | updateHash (CULong w) = updateHashWithFun hu_updateULong (fromIntegral w) 295 | updateHashStable = updateHash 296 | 297 | instance LargeHashable Char where 298 | updateHash = updateHashWithFun hu_updateUInt . c2w 299 | updateHashStable = updateHash 300 | 301 | c2w :: Char -> Word32 302 | {-# INLINE c2w #-} 303 | c2w c = toEnum (ord c :: Int) 304 | 305 | {-# INLINE updateHashInteger #-} 306 | updateHashInteger :: Integer -> LH () 307 | updateHashInteger !i 308 | | i == 0 = updateHash (0 :: CUChar) 309 | | i > 0 = do 310 | updateHash (fromIntegral (i .&. 0xffffffffffffffff) :: CULong) 311 | updateHashInteger (shift i (-64)) 312 | | otherwise = do 313 | updateHash (0 :: CUChar) -- prepend 0 to show it is negative 314 | updateHashInteger (abs i) 315 | 316 | instance LargeHashable Integer where 317 | updateHash = updateHashInteger 318 | updateHashStable = updateHash 319 | 320 | foreign import ccall doubleToWord64 :: Double -> Word64 321 | 322 | instance LargeHashable Double where 323 | updateHash = updateHash . doubleToWord64 324 | updateHashStable = updateHash 325 | 326 | foreign import ccall floatToWord32 :: Float -> Word32 327 | 328 | instance LargeHashable Float where 329 | updateHash = updateHash . floatToWord32 330 | updateHashStable = updateHash 331 | 332 | {-# INLINE updateHashFixed #-} 333 | updateHashFixed :: HasResolution a => Fixed a -> LH () 334 | updateHashFixed f = updateHash (truncate . (* f) . fromInteger $ resolution f :: Integer) 335 | 336 | instance HasResolution a => LargeHashable (Fixed a) where 337 | updateHash = updateHashFixed 338 | updateHashStable = updateHash 339 | 340 | {-# INLINE updateHashBool #-} 341 | updateHashBool :: Bool -> LH () 342 | updateHashBool True = updateHash (1 :: CUChar) 343 | updateHashBool False = updateHash (0 :: CUChar) 344 | 345 | instance LargeHashable Bool where 346 | updateHash = updateHashBool 347 | updateHashStable = updateHash 348 | 349 | {-# INLINE updateHashList #-} 350 | updateHashList :: forall a. (a -> LH ()) -> [a] -> LH () 351 | updateHashList f = loop 0 352 | where 353 | loop :: Int -> [a] -> LH () 354 | loop !i [] = 355 | updateHash i 356 | loop !i (x:xs) = do 357 | f x 358 | loop (i + 1) xs 359 | 360 | instance LargeHashable a => LargeHashable [a] where 361 | updateHash = updateHashList updateHash 362 | updateHashStable = updateHashList updateHashStable 363 | 364 | {-# INLINE setFoldFun #-} 365 | setFoldFun :: LargeHashable a => (a -> LH ()) -> LH () -> a -> LH () 366 | setFoldFun f action value = action >> f value 367 | 368 | {-# INLINE updateHashSet #-} 369 | updateHashSet :: LargeHashable a => (a -> LH ()) -> S.Set a -> LH () 370 | updateHashSet !f !set = do 371 | foldl' (setFoldFun f) (return ()) set -- Note: foldl' for sets traverses the elements in asc order 372 | updateHash (S.size set) 373 | 374 | instance LargeHashable a => LargeHashable (S.Set a) where 375 | updateHash = updateHashSet updateHash 376 | updateHashStable = updateHashSet updateHashStable 377 | 378 | {-# INLINE updateHashIntSet #-} 379 | updateHashIntSet :: IntSet.IntSet -> LH () 380 | updateHashIntSet !set = do 381 | IntSet.foldl' (setFoldFun updateHash) (return ()) set 382 | updateHash (IntSet.size set) 383 | 384 | -- Lazy and Strict IntSet share the same definition 385 | instance LargeHashable IntSet.IntSet where 386 | updateHash = updateHashIntSet 387 | updateHashStable = updateHash 388 | 389 | {-# INLINE updateHashHashSet #-} 390 | updateHashHashSet :: LargeHashable a => (a -> LH ()) -> HashSet.HashSet a -> LH () 391 | updateHashHashSet !f !set = 392 | -- The ordering of elements in a set does not matter. A HashSet does not 393 | -- offer an efficient way of exctracting its elements in some specific 394 | -- ordering. So we use the auxiliary function 'hashListModuloOrdering'. 395 | hashListModuloOrdering f (HashSet.size set) (HashSet.toList set) 396 | 397 | -- | Hashes a list of values such the two permutations of the same list 398 | -- yields the same hash. 399 | hashListModuloOrdering :: LargeHashable a => (a -> LH ()) -> Int -> [a] -> LH () 400 | hashListModuloOrdering f len list = 401 | do updateXorHash (map f list) 402 | updateHash len 403 | 404 | -- Lazy and Strict HashSet share the same definition 405 | instance LargeHashable a => LargeHashable (HashSet.HashSet a) where 406 | updateHash = updateHashHashSet updateHash 407 | updateHashStable = updateHashHashSet updateHashStable 408 | 409 | {-# INLINE mapFoldFun #-} 410 | mapFoldFun :: (LargeHashable k, LargeHashable a) => 411 | (k -> LH ()) -> (a -> LH ()) -> LH () -> k -> a -> LH () 412 | mapFoldFun kf vf action key value = action >> kf key >> vf value 413 | 414 | {-# INLINE updateHashMap #-} 415 | updateHashMap :: (LargeHashable k, LargeHashable a) => 416 | (k -> LH ()) -> (a -> LH ()) -> M.Map k a -> LH () 417 | updateHashMap !kf !vf !m = do 418 | M.foldlWithKey' (mapFoldFun kf vf) (return ()) m 419 | updateHash (M.size m) 420 | 421 | -- Lazy and Strict Map share the same definition 422 | instance (LargeHashable k, LargeHashable a) => LargeHashable (M.Map k a) where 423 | updateHash = updateHashMap updateHash updateHash 424 | updateHashStable = updateHashMap updateHashStable updateHashStable 425 | 426 | {-# INLINE updateHashIntMap #-} 427 | updateHashIntMap :: LargeHashable a => (a -> LH ()) -> IntMap.IntMap a -> LH () 428 | updateHashIntMap !f !m = do 429 | IntMap.foldlWithKey' (mapFoldFun updateHash f) (return ()) m 430 | updateHash (IntMap.size m) 431 | 432 | -- Lazy and Strict IntMap share the same definition 433 | instance LargeHashable a => LargeHashable (IntMap.IntMap a) where 434 | updateHash = updateHashIntMap updateHash 435 | updateHashStable = updateHashIntMap updateHashStable 436 | 437 | updateHashHashMap :: (LargeHashable k, LargeHashable v) => 438 | ((k, v) -> LH ()) -> HashMap.HashMap k v -> LH () 439 | updateHashHashMap !f !m = 440 | -- The ordering of elements in a map do not matter. A HashMap does not 441 | -- offer an efficient way of exctracting its elements in some specific 442 | -- ordering. So we use the auxiliary function 'hashListModuloOrdering'. 443 | hashListModuloOrdering f (HashMap.size m) (HashMap.toList m) 444 | 445 | -- Lazy and Strict HashMap share the same definition 446 | instance (LargeHashable k, LargeHashable v) => LargeHashable (HashMap.HashMap k v) where 447 | updateHash = updateHashHashMap updateHash 448 | updateHashStable = updateHashHashMap updateHashStable 449 | 450 | instance (LargeHashable a, LargeHashable b) => LargeHashable (a, b) where 451 | updateHash (!a, !b) = updateHash a >> updateHash b 452 | updateHashStable (!a, !b) = updateHashStable a >> updateHashStable b 453 | 454 | instance (LargeHashable a, LargeHashable b, LargeHashable c) => LargeHashable (a, b, c) where 455 | updateHash (a, b, c) = updateHash a >> updateHash b >> updateHash c 456 | updateHashStable (a, b, c) = updateHashStable a >> updateHashStable b >> updateHashStable c 457 | 458 | instance (LargeHashable a, LargeHashable b, LargeHashable c, LargeHashable d) => LargeHashable (a, b, c, d) where 459 | updateHash (a, b, c, d) = updateHash a >> updateHash b >> updateHash c >> updateHash d 460 | updateHashStable (a, b, c, d) = 461 | updateHashStable a >> updateHashStable b >> updateHashStable c >> updateHashStable d 462 | 463 | instance (LargeHashable a, LargeHashable b, LargeHashable c, LargeHashable d, LargeHashable e) => LargeHashable (a, b, c, d, e) where 464 | updateHash (a, b, c, d, e) = 465 | updateHash a >> updateHash b >> updateHash c >> updateHash d >> updateHash e 466 | updateHashStable (a, b, c, d, e) = 467 | updateHashStable a >> updateHashStable b >> updateHashStable c >> updateHashStable d >> updateHashStable e 468 | 469 | instance LargeHashable a => LargeHashable (Maybe a) where 470 | updateHash Nothing = updateHash (0 :: CULong) 471 | updateHash (Just !x) = updateHash (1 :: CULong) >> updateHash x 472 | updateHashStable Nothing = updateHash (0 :: CULong) 473 | updateHashStable (Just !x) = updateHash (1 :: CULong) >> updateHashStable x 474 | 475 | instance (LargeHashable a, LargeHashable b) => LargeHashable (Either a b) where 476 | updateHash (Left !l) = updateHash (0 :: CULong) >> updateHash l 477 | updateHash (Right !r) = updateHash (1 :: CULong) >> updateHash r 478 | updateHashStable (Left !l) = updateHash (0 :: CULong) >> updateHashStable l 479 | updateHashStable (Right !r) = updateHash (1 :: CULong) >> updateHashStable r 480 | 481 | instance LargeHashable () where 482 | updateHash () = return () 483 | updateHashStable () = return () 484 | 485 | instance LargeHashable Ordering where 486 | updateHash EQ = updateHash (0 :: CULong) 487 | updateHash GT = updateHash (-1 :: CULong) 488 | updateHash LT = updateHash (1 :: CULong) 489 | updateHashStable = updateHash 490 | 491 | instance (Integral a, LargeHashable a) => LargeHashable (Ratio a) where 492 | updateHash !i = do 493 | updateHash $ numerator i 494 | updateHash $ denominator i 495 | updateHashStable = updateHash 496 | 497 | instance LargeHashable AbsoluteTime where 498 | updateHash t = updateHash $ diffAbsoluteTime t taiEpoch 499 | updateHashStable = updateHash 500 | 501 | instance LargeHashable DiffTime where 502 | -- could be replaced by diffTimeToPicoseconds as soon as 503 | -- time 1.6 becomes more common 504 | updateHash = updateHash . (fromRational . toRational :: DiffTime -> Pico) 505 | updateHashStable = updateHash 506 | 507 | instance LargeHashable NominalDiffTime where 508 | updateHash = updateHash . (fromRational . toRational :: NominalDiffTime -> Pico) 509 | updateHashStable = updateHash 510 | 511 | instance LargeHashable LocalTime where 512 | updateHash (LocalTime d tod) = updateHash d >> updateHash tod 513 | updateHashStable = updateHash 514 | 515 | instance LargeHashable ZonedTime where 516 | updateHash (ZonedTime lt tz) = updateHash lt >> updateHash tz 517 | updateHashStable = updateHash 518 | 519 | instance LargeHashable TimeOfDay where 520 | updateHash (TimeOfDay h m s) = updateHash h >> updateHash m >> updateHash s 521 | updateHashStable = updateHash 522 | 523 | instance LargeHashable TimeZone where 524 | updateHash (TimeZone mintz summerOnly name) = 525 | updateHash mintz >> updateHash summerOnly >> updateHash name 526 | updateHashStable (TimeZone mintz summerOnly name) = 527 | updateHashStable mintz >> updateHashStable summerOnly >> updateHashStable name 528 | 529 | instance LargeHashable UTCTime where 530 | updateHash (UTCTime d dt) = updateHash d >> updateHash dt 531 | updateHashStable = updateHash 532 | 533 | instance LargeHashable Day where 534 | updateHash (ModifiedJulianDay d) = updateHash d 535 | updateHashStable = updateHash 536 | 537 | instance LargeHashable UniversalTime where 538 | updateHash (ModJulianDate d) = updateHash d 539 | updateHashStable = updateHash 540 | 541 | instance LargeHashable a => LargeHashable (V.Vector a) where 542 | updateHash = updateHash . V.toList 543 | updateHashStable = updateHashStable . V.toList 544 | 545 | instance (LargeHashable a, LargeHashable b) => LargeHashable (Tuple.Pair a b) where 546 | updateHash (x Tuple.:!: y) = 547 | do updateHash x 548 | updateHash y 549 | updateHashStable (x Tuple.:!: y) = 550 | do updateHashStable x 551 | updateHashStable y 552 | 553 | instance LargeHashable Sci.Scientific where 554 | updateHash notNormalized = 555 | do let n = Sci.normalize notNormalized 556 | updateHash (Sci.coefficient n) 557 | updateHash (Sci.base10Exponent n) 558 | updateHashStable = updateHash 559 | 560 | updateHashJson :: (forall a . LargeHashable a => a -> LH ()) -> J.Value -> LH () 561 | updateHashJson f v = 562 | case v of 563 | J.Object obj -> 564 | do updateHash (0::Int) 565 | f obj 566 | J.Array arr -> 567 | do updateHash (1::Int) 568 | f arr 569 | J.String t -> 570 | do updateHash (2::Int) 571 | f t 572 | J.Number n -> 573 | do updateHash (3::Int) 574 | f n 575 | J.Bool b -> 576 | do updateHash (4::Int) 577 | f b 578 | J.Null -> 579 | updateHash (5::Int) 580 | 581 | instance LargeHashable J.Value where 582 | updateHash = updateHashJson updateHash 583 | updateHashStable = updateHashJson updateHashStable 584 | 585 | #if MIN_VERSION_aeson(2,0,0) 586 | instance LargeHashable J.Key where 587 | updateHash = updateHash . AesonKey.toText 588 | updateHashStable = updateHashStable . AesonKey.toText 589 | 590 | instance LargeHashable v => LargeHashable (AesonKeyMap.KeyMap v) where 591 | updateHash v = updateHash (AesonKeyMap.toHashMap v) 592 | updateHashStable v = updateHashStable (AesonKeyMap.toHashMap v) 593 | #endif 594 | 595 | instance LargeHashable Void where 596 | updateHash _ = error "I'm void" 597 | 598 | instance LargeHashable a => LargeHashable (Seq.Seq a) where 599 | updateHash = updateHash . F.toList 600 | updateHashStable = updateHashStable . F.toList 601 | 602 | genericUpdateHash :: (Generic a, GenericLargeHashable (Rep a)) => a -> LH () 603 | genericUpdateHash = updateHashGeneric . from 604 | {-# INLINE genericUpdateHash #-} 605 | 606 | genericUpdateHashStable :: (Generic a, GenericLargeHashable (Rep a)) => a -> LH () 607 | genericUpdateHashStable = updateHashStableGeneric . from 608 | {-# INLINE genericUpdateHashStable #-} 609 | 610 | -- | Support for generically deriving 'LargeHashable' instances. 611 | -- Any instance of the type class 'GHC.Generics.Generic' can be made 612 | -- an instance of 'LargeHashable' by an empty instance declaration. 613 | class GenericLargeHashable f where 614 | updateHashGeneric :: f p -> LH () 615 | updateHashStableGeneric :: f p -> LH () 616 | 617 | instance GenericLargeHashable V1 where 618 | {-# INLINE updateHashGeneric #-} 619 | updateHashGeneric = undefined 620 | updateHashStableGeneric = undefined 621 | 622 | instance GenericLargeHashable U1 where 623 | {-# INLINE updateHashGeneric #-} 624 | updateHashGeneric U1 = updateHash () 625 | updateHashStableGeneric U1 = updateHashStable () 626 | 627 | instance (GenericLargeHashable f, GenericLargeHashable g) => GenericLargeHashable (f :*: g) where 628 | {-# INLINE updateHashGeneric #-} 629 | updateHashGeneric (x :*: y) = updateHashGeneric x >> updateHashGeneric y 630 | updateHashStableGeneric (x :*: y) = updateHashStableGeneric x >> updateHashStableGeneric y 631 | 632 | instance (GenericLargeHashable f, GenericLargeHashableSum g) => GenericLargeHashable (f :+: g) where 633 | {-# INLINE updateHashGeneric #-} 634 | updateHashGeneric x = updateHashGenericSum x 0 635 | updateHashStableGeneric x = updateHashStableGenericSum x 0 636 | 637 | instance LargeHashable c => GenericLargeHashable (K1 i c) where 638 | {-# INLINE updateHashGeneric #-} 639 | updateHashGeneric x = updateHash (unK1 x) 640 | updateHashStableGeneric x = updateHashStable (unK1 x) 641 | 642 | -- ignore meta-info (for now) 643 | instance (GenericLargeHashable f) => GenericLargeHashable (M1 i t f) where 644 | {-# INLINE updateHashGeneric #-} 645 | updateHashGeneric x = updateHashGeneric (unM1 x) 646 | updateHashStableGeneric x = updateHashStableGeneric (unM1 x) 647 | 648 | class GenericLargeHashableSum (f :: Type -> Type) where 649 | updateHashGenericSum :: f p -> Int -> LH () 650 | updateHashStableGenericSum :: f p -> Int -> LH () 651 | 652 | instance (GenericLargeHashable f, GenericLargeHashableSum g) 653 | => GenericLargeHashableSum (f :+: g) where 654 | {-# INLINE updateHashGenericSum #-} 655 | updateHashGenericSum (L1 x) !p = do 656 | updateHash p 657 | updateHashGeneric x 658 | updateHashGenericSum (R1 x) !p = updateHashGenericSum x (p+1) 659 | updateHashStableGenericSum (L1 x) !p = do 660 | updateHashStable p 661 | updateHashStableGeneric x 662 | updateHashStableGenericSum (R1 x) !p = updateHashStableGenericSum x (p+1) 663 | 664 | instance (GenericLargeHashable f) => GenericLargeHashableSum (M1 i t f) where 665 | {-# INLINE updateHashGenericSum #-} 666 | updateHashGenericSum x !p = do 667 | updateHash p 668 | updateHashGeneric (unM1 x) 669 | updateHashStableGenericSum x !p = do 670 | updateHashStable p 671 | updateHashStableGeneric (unM1 x) 672 | -------------------------------------------------------------------------------- /src/Data/LargeHashable/Endianness.hs: -------------------------------------------------------------------------------- 1 | {-# LANGUAGE CPP #-} 2 | module Data.LargeHashable.Endianness ( 3 | 4 | Endianness(..), systemEndianness 5 | 6 | ) where 7 | 8 | -- taken from the cpu package (last release 2013) 9 | 10 | -- | represent the CPU endianness 11 | -- 12 | -- Big endian system stores bytes with the MSB as the first byte. 13 | -- Little endian system stores bytes with the LSB as the first byte. 14 | -- 15 | -- middle endian is purposely avoided. 16 | data Endianness = LittleEndian 17 | | BigEndian 18 | deriving (Show,Eq) 19 | 20 | -- | return the system endianness 21 | systemEndianness :: Endianness 22 | #ifdef WORDS_BIGENDIAN 23 | systemEndianness = BigEndian 24 | #else 25 | systemEndianness = LittleEndian 26 | #endif 27 | -------------------------------------------------------------------------------- /src/Data/LargeHashable/Intern.hs: -------------------------------------------------------------------------------- 1 | -- | Generic, low-level data types for hashing. This is an internal module. 2 | -- 3 | -- You should only import this module if you write your own hash algorithm 4 | -- or if you need access to low-level hashing functions when defining 5 | -- instances of 'LargeHash'. 6 | -- 7 | -- Regular users should not import this module. Import 'Data.LargeHashable' 8 | -- instead. 9 | {-# LANGUAGE BangPatterns #-} 10 | {-# LANGUAGE DeriveGeneric #-} 11 | {-# LANGUAGE GeneralizedNewtypeDeriving #-} 12 | module Data.LargeHashable.Intern ( 13 | 14 | HashUpdates(..), HashAlgorithm(..), LH 15 | , hashUpdates, ioInLH, runLH, updateXorHash 16 | 17 | ) where 18 | 19 | -- keep imports in alphabetic order (in Emacs, use "M-x sort-lines") 20 | import Control.Monad 21 | import Data.Word 22 | import Foreign.Ptr 23 | import System.IO.Unsafe (unsafePerformIO) 24 | 25 | -- | Functions for updating an intermediate hash value. The functions live 26 | -- in the 'IO' monad because they are typically implemented via FFI. 27 | data HashUpdates 28 | = HashUpdates 29 | { hu_updatePtr :: {-# NOUNPACK #-} !(Ptr Word8 -> Int -> IO ()) -- ^ adds a byte array to the hash 30 | , hu_updateUChar :: {-# NOUNPACK #-} !(Word8 -> IO ()) -- Word8 31 | , hu_updateUShort :: {-# NOUNPACK #-} !(Word16 -> IO ()) -- Word16 32 | , hu_updateUInt :: {-# NOUNPACK #-} !(Word32 -> IO ()) -- Word32 33 | , hu_updateULong :: {-# NOUNPACK #-} !(Word64 -> IO ()) -- Word64 34 | } 35 | 36 | -- | The interface for a hashing algorithm. The interface contains a simple run 37 | -- function, which is used to update the hash with all values needed, and the 38 | -- outputs the resulting hash. 39 | data HashAlgorithm h 40 | = HashAlgorithm 41 | { ha_run :: {-# NOUNPACK #-} !((HashUpdates -> IO ()) -> IO h) 42 | , ha_xor :: {-# NOUNPACK #-} !(h -> h -> h) 43 | , ha_updateHash :: {-# NOUNPACK #-} !(HashUpdates -> h -> IO ()) 44 | } 45 | 46 | data LHEnv 47 | = LHEnv 48 | { lh_updates :: {-# NOUNPACK #-} !HashUpdates 49 | , lh_updateXorHash :: {-# NOUNPACK #-} !([LH ()] -> IO ()) 50 | } 51 | 52 | -- | The 'LH' monad (`LH` stands for "large hash") is used in the definition of 53 | -- hashing functions for arbitrary data types. 54 | newtype LH a = LH (LHEnv -> IO a) 55 | 56 | {-# INLINE lhFmap #-} 57 | lhFmap :: (a -> b) -> LH a -> LH b 58 | lhFmap f (LH x) = 59 | LH $ \env -> 60 | do y <- x env 61 | return (f y) 62 | 63 | {-# INLINE lhReturn #-} 64 | lhReturn :: a -> LH a 65 | lhReturn x = LH $ \_env -> return x 66 | 67 | {-# INLINE lhApp #-} 68 | lhApp :: LH (a -> b) -> LH a -> LH b 69 | lhApp (LH f) (LH x) = 70 | LH $ \env -> f env <*> x env 71 | 72 | {-# INLINE lhBind #-} 73 | lhBind :: LH a -> (a -> LH b) -> LH b 74 | lhBind (LH x) f = 75 | LH $ \env -> 76 | do y <- x env 77 | let (LH g) = f y 78 | g env 79 | 80 | {-# INLINE lhBind' #-} 81 | lhBind' :: LH a -> LH b -> LH b 82 | lhBind' (LH x) (LH y) = 83 | LH $ \env -> 84 | do _ <- x env 85 | y env 86 | 87 | instance Functor LH where 88 | fmap = lhFmap 89 | 90 | instance Applicative LH where 91 | pure = lhReturn 92 | (<*>) = lhApp 93 | (*>) = lhBind' 94 | 95 | instance Monad LH where 96 | (>>=) = lhBind 97 | 98 | {-# INLINE hashUpdates #-} 99 | hashUpdates :: LH HashUpdates 100 | hashUpdates = 101 | LH $ \env -> return (lh_updates env) 102 | 103 | {-# INLINE getUpdateXorHash #-} 104 | getUpdateXorHash :: LH ([LH ()] -> IO ()) 105 | getUpdateXorHash = 106 | LH $ \env -> return (lh_updateXorHash env) 107 | 108 | -- | Perform an 'IO' action in the 'LH' monad. Use with care, do not perform 109 | -- arbitrary 'IO' operation with this function! Only use it for calling 110 | -- functions of the 'HashUpdates' datatype. 111 | {-# INLINE ioInLH #-} 112 | ioInLH :: IO a -> LH a 113 | ioInLH io = 114 | LH $ \_env -> io 115 | 116 | -- | Runs a 'LH' computation and returns the resulting hash. 117 | {-# NOINLINE runLH #-} 118 | runLH :: HashAlgorithm h -> LH () -> h 119 | runLH alg lh = 120 | unsafePerformIO (runLH' alg lh) 121 | 122 | runLH' :: HashAlgorithm h -> LH () -> IO h 123 | runLH' alg (LH lh) = 124 | ha_run alg fun 125 | where 126 | fun updates = 127 | lh (LHEnv updates (updateXor updates)) 128 | updateXor updates actions = 129 | do mh <- foldM foldFun Nothing actions 130 | case mh of 131 | Just h -> ha_updateHash alg updates h 132 | Nothing -> return () 133 | foldFun mh action = 134 | do h2 <- runLH' alg action 135 | case mh of 136 | Nothing -> return (Just h2) 137 | Just h1 -> 138 | let !h = ha_xor alg h1 h2 139 | in return (Just h) 140 | 141 | updateXorHash :: [LH ()] -> LH () 142 | updateXorHash actions = 143 | do f <- getUpdateXorHash 144 | ioInLH (f actions) 145 | -------------------------------------------------------------------------------- /src/Data/LargeHashable/LargeWord.hs: -------------------------------------------------------------------------------- 1 | {-# LANGUAGE DeriveDataTypeable #-} 2 | {-# LANGUAGE DeriveGeneric #-} 3 | -- | Efficient representation for small bytearrays with 128 or 256 bits. 4 | module Data.LargeHashable.LargeWord 5 | ( Word128(..), Word256(..) 6 | , bsToW128, w128ToBs 7 | , bsToW256, w256ToBs 8 | , xorW128, xorW256 9 | ) 10 | where 11 | 12 | -- keep imports in alphabetic order (in Emacs, use "M-x sort-lines") 13 | import Data.Bits 14 | import Data.Data (Data) 15 | import Data.Typeable 16 | import Data.Word 17 | import GHC.Generics (Generic) 18 | import qualified Data.ByteString as BS 19 | 20 | data Word128 21 | = Word128 22 | { w128_first :: !Word64 23 | , w128_second :: !Word64 24 | } 25 | deriving (Show, Read, Eq, Ord, Typeable, Generic, Data) 26 | 27 | data Word256 28 | = Word256 29 | { w256_first :: !Word128 30 | , w256_second :: !Word128 31 | } 32 | deriving (Show, Read, Eq, Ord, Typeable, Generic, Data) 33 | 34 | -- | Converts a 'ByteString' into a 'Word256'. Only the first 32 bytes 35 | -- are taken into account, the rest is ignored. 36 | bsToW256 :: BS.ByteString -> Word256 37 | bsToW256 bs = Word256 first128 next128 38 | where 39 | first128 = bsToW128 bs 40 | next128 = bsToW128 (BS.drop 16 bs) 41 | 42 | w256ToBs :: Word256 -> BS.ByteString 43 | w256ToBs (Word256 first128 next128) = 44 | w128ToBs first128 `BS.append` w128ToBs next128 45 | 46 | -- | Converts a 'ByteString' into a 'Word128'. Only the first 16 bytes 47 | -- are taken into account, the rest is ignored. 48 | bsToW128 :: BS.ByteString -> Word128 49 | bsToW128 bs = Word128 first64 next64 50 | where 51 | first64 = bsToW64 bs 52 | next64 = bsToW64 (BS.drop 8 bs) 53 | 54 | w128ToBs :: Word128 -> BS.ByteString 55 | w128ToBs (Word128 first64 next64) = 56 | w64ToBs first64 `BS.append` w64ToBs next64 57 | 58 | w64ToBs :: Word64 -> BS.ByteString 59 | w64ToBs w64 = 60 | BS.pack 61 | [ fromIntegral (w64 `shiftR` 56 .&. 255) 62 | , fromIntegral (w64 `shiftR` 48 .&. 255) 63 | , fromIntegral (w64 `shiftR` 40 .&. 255) 64 | , fromIntegral (w64 `shiftR` 32 .&. 255) 65 | , fromIntegral (w64 `shiftR` 24 .&. 255) 66 | , fromIntegral (w64 `shiftR` 16 .&. 255) 67 | , fromIntegral (w64 `shiftR` 8 .&. 255) 68 | , fromIntegral (w64 .&. 255) 69 | ] 70 | 71 | bsToW64 :: BS.ByteString -> Word64 72 | bsToW64 = BS.foldl' (\x w8 -> (x `shiftL` 8) + fromIntegral w8) 0 . BS.take 8 73 | 74 | xorW128 :: Word128 -> Word128 -> Word128 75 | xorW128 (Word128 w11 w12) (Word128 w21 w22) = Word128 (w11 `xor` w21) (w12 `xor` w22) 76 | 77 | xorW256 :: Word256 -> Word256 -> Word256 78 | xorW256 (Word256 w11 w12) (Word256 w21 w22) = Word256 (w11 `xorW128` w21) (w12 `xorW128` w22) 79 | -------------------------------------------------------------------------------- /src/Data/LargeHashable/MD5.hs: -------------------------------------------------------------------------------- 1 | -- | An implementation of 'HashAlgorithm' for MD5 (https://www.ietf.org/rfc/rfc1321.txt). 2 | {-# LANGUAGE BangPatterns #-} 3 | {-# LANGUAGE ScopedTypeVariables #-} 4 | {-# LANGUAGE CApiFFI #-} 5 | module Data.LargeHashable.MD5 ( 6 | 7 | MD5Hash(..), md5HashAlgorithm, runMD5 8 | 9 | ) where 10 | 11 | -- keep imports in alphabetic order (in Emacs, use "M-x sort-lines") 12 | import Data.LargeHashable.Intern 13 | import Data.LargeHashable.LargeWord 14 | import Data.Word 15 | import Foreign.Marshal.Alloc 16 | import Foreign.Ptr 17 | import Foreign.Storable 18 | import qualified Data.ByteString.Base16 as Base16 19 | import qualified Data.ByteString.Char8 as BSC 20 | 21 | newtype MD5Hash = MD5Hash { unMD5Hash :: Word128 } 22 | deriving (Eq, Ord) 23 | 24 | instance Show MD5Hash where 25 | show (MD5Hash w) = 26 | BSC.unpack (Base16.encode (w128ToBs w)) 27 | 28 | instance Read MD5Hash where 29 | -- readsPrec :: Read a => Int -> String -> [(a, String)] 30 | readsPrec _ s = 31 | let n = 32 32 | (prefix, suffix) = splitAt n s 33 | in 34 | if length prefix /= n 35 | then [] 36 | else 37 | case Base16.decode (BSC.pack prefix) of 38 | Left _ -> [] 39 | Right bs -> [(MD5Hash (bsToW128 bs), suffix)] 40 | 41 | foreign import capi unsafe "md5.h md5_init" 42 | c_md5_init :: Ptr RawCtx -> IO () 43 | 44 | foreign import capi unsafe "md5.h md5_update" 45 | c_md5_update :: Ptr RawCtx -> Ptr Word8 -> Int -> IO () 46 | 47 | foreign import capi unsafe "md5.h md5_update_uchar" 48 | c_md5_update_uchar :: Ptr RawCtx -> Word8 -> IO () 49 | 50 | foreign import capi unsafe "md5.h md5_update_ushort" 51 | c_md5_update_ushort :: Ptr RawCtx -> Word16 -> IO () 52 | 53 | foreign import capi unsafe "md5.h md5_update_uint" 54 | c_md5_update_uint :: Ptr RawCtx -> Word32 -> IO () 55 | 56 | foreign import capi unsafe "md5.h md5_update_ulong" 57 | c_md5_update_ulong :: Ptr RawCtx -> Word64 -> IO () 58 | 59 | foreign import capi unsafe "md5.h md5_finalize" 60 | c_md5_finalize :: Ptr RawCtx -> Ptr Word8 -> IO () 61 | 62 | {-# INLINE digestSize #-} 63 | digestSize :: Int 64 | digestSize = 16 65 | 66 | {-# INLINE sizeCtx #-} 67 | sizeCtx :: Int 68 | sizeCtx = 96 69 | 70 | data RawCtx -- phantom type argument 71 | 72 | newtype Ctx = Ctx { _unCtx :: Ptr RawCtx } 73 | 74 | withCtx :: (Ctx -> IO ()) -> IO MD5Hash 75 | withCtx f = 76 | allocaBytes sizeCtx $ \(ptr :: Ptr RawCtx) -> 77 | do c_md5_init ptr 78 | f (Ctx ptr) 79 | allocaBytes digestSize $ \(resPtr :: Ptr Word8) -> 80 | do c_md5_finalize ptr resPtr 81 | let first = castPtr resPtr :: Ptr Word64 82 | w1 <- peek first 83 | let second = castPtr (plusPtr resPtr (sizeOf w1)) :: Ptr Word64 84 | w2 <- peek second 85 | return (MD5Hash (Word128 w1 w2)) 86 | 87 | md5HashAlgorithm :: HashAlgorithm MD5Hash 88 | md5HashAlgorithm = 89 | HashAlgorithm 90 | { ha_run = run 91 | , ha_xor = xorMD5 92 | , ha_updateHash = updateHash 93 | } 94 | where 95 | xorMD5 (MD5Hash h1) (MD5Hash h2) = MD5Hash (h1 `xorW128` h2) 96 | updateHash updates (MD5Hash h) = 97 | let f = hu_updateULong updates 98 | in do f (w128_first h) 99 | f (w128_second h) 100 | run f = 101 | withCtx $ \(Ctx ctxPtr) -> 102 | let !updates = 103 | HashUpdates 104 | { hu_updatePtr = c_md5_update ctxPtr 105 | , hu_updateUChar = c_md5_update_uchar ctxPtr 106 | , hu_updateUShort = c_md5_update_ushort ctxPtr 107 | , hu_updateUInt = c_md5_update_uint ctxPtr 108 | , hu_updateULong = c_md5_update_ulong ctxPtr 109 | } 110 | in f updates 111 | 112 | runMD5 :: LH () -> MD5Hash 113 | runMD5 = runLH md5HashAlgorithm 114 | -------------------------------------------------------------------------------- /src/Data/LargeHashable/TH.hs: -------------------------------------------------------------------------------- 1 | {-# LANGUAGE CPP #-} 2 | {-# LANGUAGE QuasiQuotes #-} 3 | {-# LANGUAGE TemplateHaskell #-} 4 | module Data.LargeHashable.TH ( 5 | 6 | deriveLargeHashable, deriveLargeHashableCtx, deriveLargeHashableNoCtx 7 | , deriveLargeHashableCustomCtx 8 | 9 | ) where 10 | 11 | import Control.Arrow (first) 12 | import Control.Monad (forM) 13 | import Data.LargeHashable.Class 14 | import Data.Word 15 | import Language.Haskell.TH 16 | 17 | -- | Template Haskell function to automatically derive 18 | -- instances of 'LargeHashable'. The derived instances first 19 | -- calls 'updateHash' with an unique identifier number for 20 | -- every constructor, followed by 'updateHash' calls for every 21 | -- field of the constructor (if existent). It also works for 22 | -- type families. 23 | -- 24 | -- E. g. for the following code 25 | -- 26 | -- @ 27 | -- 28 | -- data BlaFoo a = Foo 29 | -- | Bar Int a 30 | -- | Baz a a 31 | -- 32 | -- $(deriveLargeHashable ''BlaFoo) 33 | -- @ 34 | -- 35 | -- The following instance gets generated: 36 | -- 37 | -- @ 38 | -- instance LargeHashable a_apg8 => 39 | -- LargeHashable (BlaFoo a_apg8) where 40 | -- updateHash Foo = updateHash (0 :: Foreign.C.Types.CULong) 41 | -- updateHash (Bar a b) 42 | -- = (((updateHash (1 :: Foreign.C.Types.CULong)) >> (updateHash a)) 43 | -- >> (updateHash b)) 44 | -- updateHash (XY a b) 45 | -- = (((updateHash (2 :: Foreign.C.Types.CULong)) >> (updateHash a)) 46 | -- >> (updateHash b)) 47 | -- @ 48 | deriveLargeHashable :: Name -> Q [Dec] 49 | deriveLargeHashable n = reify n >>= \info -> 50 | case info of 51 | TyConI dec -> 52 | case dec of 53 | #if MIN_VERSION_template_haskell(2,11,0) 54 | DataD context name tyvars _ cons _ -> 55 | #else 56 | DataD context name tyvars cons _ -> 57 | #endif 58 | buildInstance (ConT name) context tyvars cons 59 | 60 | #if MIN_VERSION_template_haskell(2,11,0) 61 | NewtypeD context name tyvars _ con _ -> 62 | #else 63 | NewtypeD context name tyvars con _ -> 64 | #endif 65 | buildInstance (ConT name) context tyvars [con] 66 | _ -> fail $ notDeriveAbleErrorMsg n info 67 | FamilyI _ instDecs -> fmap concat $ forM instDecs $ \instDec -> 68 | case instDec of 69 | #if MIN_VERSION_template_haskell(2,15,0) 70 | DataInstD context _ ty _ cons _ -> 71 | #elif MIN_VERSION_template_haskell(2,11,0) 72 | DataInstD context name types _ cons _ -> let ty = foldl AppT (ConT name) types in 73 | #else 74 | DataInstD context name types cons _ -> let ty = foldl AppT (ConT name) types in 75 | #endif 76 | buildInstance ty context [] cons 77 | #if MIN_VERSION_template_haskell(2,15,0) 78 | NewtypeInstD context _ ty _ con _ -> 79 | #elif MIN_VERSION_template_haskell(2,11,0) 80 | NewtypeInstD context name types _ con _ -> let ty = foldl AppT (ConT name) types in 81 | #else 82 | NewtypeInstD context name types con _ -> let ty = foldl AppT (ConT name) types in 83 | #endif 84 | buildInstance ty context [] [con] 85 | _ -> fail $ notDeriveAbleErrorMsg n info 86 | _ -> fail $ notDeriveAbleErrorMsg n info 87 | 88 | -- | Derive a 'LargeHashable' instance with extra constraints in the 89 | -- context of the instance. 90 | deriveLargeHashableCtx :: 91 | Name 92 | -> ([TypeQ] -> [PredQ]) 93 | -- ^ Function mapping the type variables in the instance head to the additional constraints 94 | -> Q [Dec] 95 | deriveLargeHashableCtx tyName extraPreds = 96 | deriveLargeHashableCustomCtx tyName mkCtx 97 | where 98 | mkCtx args oldCtx = 99 | oldCtx ++ extraPreds args 100 | 101 | -- | Derive a 'LargeHashable' instance with no constraints in the context of the instance. 102 | deriveLargeHashableNoCtx :: 103 | Name 104 | -> (Q [Dec]) 105 | deriveLargeHashableNoCtx tyName = 106 | deriveLargeHashableCustomCtx tyName (\_ _ -> []) 107 | 108 | -- | Derive a 'LargeHashable' instance with a completely custom instance context. 109 | deriveLargeHashableCustomCtx :: 110 | Name 111 | -> ([TypeQ] -> [PredQ] -> [PredQ]) 112 | -- ^ Function mapping the type variables in the instance head and the 113 | -- constraints that would normally be generated to the constraints 114 | -- that should be generated. 115 | -> (Q [Dec]) 116 | deriveLargeHashableCustomCtx tyName extraPreds = 117 | do decs <- deriveLargeHashable tyName 118 | case decs of 119 | #if MIN_VERSION_template_haskell(2,11,0) 120 | (InstanceD overlap ctx ty body : _) -> 121 | #else 122 | (InstanceD ctx ty body : _) -> 123 | #endif 124 | do let args = reverse (collectArgs ty) 125 | newCtx <- sequence (extraPreds (map return args) (map return ctx)) 126 | -- _ <- fail ("args: " ++ show args ++", ty: " ++ show ty) 127 | #if MIN_VERSION_template_haskell(2,11,0) 128 | return [InstanceD overlap newCtx ty body] 129 | #else 130 | return [InstanceD newCtx ty body] 131 | #endif 132 | _ -> 133 | error $ 134 | "Unexpected declarations returned by deriveLargeHashable: " ++ show (ppr decs) 135 | where 136 | collectArgs :: Type -> [Type] 137 | collectArgs outerTy = 138 | let loop ty = 139 | case ty of 140 | (AppT l r) -> 141 | case l of 142 | AppT _ _ -> r : loop l 143 | _ -> [r] 144 | _ -> [] 145 | in case outerTy of 146 | AppT _ r -> loop r 147 | _ -> [] 148 | 149 | -- | Generates the error message displayed when somebody tries to let us 150 | -- derive impossible instances! 151 | notDeriveAbleErrorMsg :: Name -> Info -> String 152 | notDeriveAbleErrorMsg name info = "Could not derive LargeHashable instance for " 153 | ++ (show name) ++ "(" ++ (show info) ++ "). If you think this should be possible, file an issue." 154 | 155 | -- | After 'deriveLargeHashable' has matched all the important information 156 | -- this function gets called to build the instance declaration. 157 | #if MIN_VERSION_template_haskell(2,17,0) 158 | buildInstance :: Type -> Cxt -> [TyVarBndr f] -> [Con] -> Q [Dec] 159 | #else 160 | buildInstance :: Type -> Cxt -> [TyVarBndr] -> [Con] -> Q [Dec] 161 | #endif 162 | buildInstance basicType context vars cons = 163 | let consWithIds = zip [0..] cons 164 | constraints = makeConstraints context vars 165 | typeWithVars = foldl appT (return basicType) $ map (varT . varName) vars 166 | in (:[]) <$> instanceD constraints (conT ''LargeHashable `appT` typeWithVars) 167 | [updateHashDeclaration 'updateHash consWithIds, 168 | updateHashDeclaration 'updateHashStable consWithIds] 169 | 170 | -- | This function generates the declaration for the 'updateHash' and the 171 | -- 'updateHashStable functions 172 | -- of the 'LargeHashable' typeclass. By taking the constructors with there 173 | -- selected IDs and calling 'updateHashClause' for everyone of them to generate 174 | -- the corresponding clause. 175 | updateHashDeclaration :: Name -> [(Integer, Con)] -> Q Dec 176 | updateHashDeclaration name [(_, con)] = 177 | funD name [updateHashClause name Nothing con] 178 | updateHashDeclaration name consWIds = 179 | funD name (map (uncurry (updateHashClause name) . first Just) consWIds) 180 | 181 | -- | 'updateHashClause' generates a clause of the 'updateHash' function. 182 | -- It makes sure all the fields are matched correctly and updates the hash 183 | -- with the neccessary information about the constructor (its ID) and all 184 | -- of its fields. 185 | updateHashClause :: Name -> Maybe Integer -> Con -> Q Clause 186 | updateHashClause name mI con = 187 | clause [return patOfClause] 188 | (normalB $ 189 | foldl sequenceExps 190 | conMarker 191 | hashUpdatesOfConFields) 192 | [] 193 | where conMarker = case mI of 194 | Just i -> [| updateHash ($(litE . IntegerL $ i) :: Word64) |] 195 | Nothing -> [| return () |] 196 | hashUpdatesOfConFields = map (\pn -> [| $(varE name) $(varE pn) |]) patVarNames 197 | -- Extract the names of all the 198 | -- pattern variables from usedPat. 199 | patVarNames = case patOfClause of 200 | #if MIN_VERSION_template_haskell(2,18,0) 201 | ConP _ _ vars -> map (\(VarP v) -> v) vars 202 | #else 203 | ConP _ vars -> map (\(VarP v) -> v) vars 204 | #endif 205 | InfixP (VarP v1) _ (VarP v2) -> [v1, v2] 206 | _ -> error "Pattern in patVarNames not matched!" 207 | patOfClause = patternForCon con 208 | 209 | -- | Generate a Pattern that matches the supplied constructor 210 | -- and all of its fields. 211 | patternForCon :: Con -> Pat 212 | patternForCon con = case con of 213 | NormalC n types -> conP n $ uniqueVarPats (length types) 214 | RecC n varTypes -> conP n $ uniqueVarPats (length varTypes) 215 | InfixC _ n _ -> InfixP (VarP . mkName $ "x") n (VarP . mkName $ "y") 216 | c@(ForallC{}) -> error $ "Cannot derive quantified type as it would potentially violate uniqueness: "++ show c 217 | #if MIN_VERSION_template_haskell(2,11,0) 218 | GadtC [n] types _ -> conP n $ uniqueVarPats (length types) 219 | RecGadtC [n] varTypes _ -> conP n $ uniqueVarPats (length varTypes) 220 | _ -> error $ "Constructor not supported: "++show con 221 | #endif 222 | where 223 | uniqueVarPats n = take n . map (VarP . mkName) $ names 224 | conP n = 225 | #if MIN_VERSION_template_haskell(2,18,0) 226 | ConP n [] 227 | #else 228 | ConP n 229 | #endif 230 | 231 | 232 | -- | Sequences two Expressions using the '(>>)' operator. 233 | sequenceExps :: Q Exp -> Q Exp -> Q Exp 234 | sequenceExps first second = infixE (Just first) (varE '(>>)) (Just second) 235 | 236 | -- | Generates the constraints needed for the declaration of 237 | -- the 'LargeHashable' class. This means that the constraint 238 | -- @LargeHashable $TypeVar$@ is added for every type variable 239 | -- the type has. 240 | #if MIN_VERSION_template_haskell(2,17,0) 241 | makeConstraints :: Cxt -> [TyVarBndr f] -> Q Cxt 242 | #else 243 | makeConstraints :: Cxt -> [TyVarBndr] -> Q Cxt 244 | #endif 245 | makeConstraints context vars = return $ context ++ 246 | map (\v -> (ConT (toLargeHashableClass v)) `AppT` (VarT . varName $ v)) vars 247 | where 248 | #if MIN_VERSION_template_haskell(2,17,0) 249 | toLargeHashableClass :: TyVarBndr f -> Name 250 | toLargeHashableClass var = 251 | case var of 252 | (PlainTV _ _) -> ''LargeHashable 253 | (KindedTV _ _ (AppT (AppT ArrowT StarT) StarT)) -> ''LargeHashable' 254 | (KindedTV _ _ _) -> ''LargeHashable 255 | #else 256 | toLargeHashableClass :: TyVarBndr -> Name 257 | toLargeHashableClass var = 258 | case var of 259 | (PlainTV _) -> ''LargeHashable 260 | (KindedTV _ (AppT (AppT ArrowT StarT) StarT)) -> ''LargeHashable' 261 | (KindedTV _ _) -> ''LargeHashable 262 | #endif 263 | 264 | -- | Returns the 'Name' for a type variable. 265 | #if MIN_VERSION_template_haskell(2,17,0) 266 | varName :: TyVarBndr f -> Name 267 | varName (PlainTV n _) = n 268 | varName (KindedTV n _ _) = n 269 | #else 270 | varName :: TyVarBndr -> Name 271 | varName (PlainTV n) = n 272 | varName (KindedTV n _) = n 273 | #endif 274 | 275 | -- | An infinite list of unique names that 276 | -- are used in the generations of patterns. 277 | names :: [String] 278 | names = concat $ map (gen (map (:[]) ['a'..'z'])) [0..] 279 | where gen :: [String] -> Integer -> [String] 280 | gen acc 0 = acc 281 | gen acc n = gen (concat $ map (\q -> map (\c -> c : q) ['a'..'z']) acc) (n - 1) 282 | -------------------------------------------------------------------------------- /stack-ghc-9.2.yaml: -------------------------------------------------------------------------------- 1 | resolver: lts-20.21 2 | require-stack-version: ">= 1.0.0" 3 | 4 | packages: 5 | - '.' 6 | 7 | flags: {} 8 | 9 | extra-deps: 10 | - crypton-1.0.1@sha256:f41316fbc6ad878396e476355e27b70ac35c344d74e3eefafe709e03b192be9e,14527 11 | -------------------------------------------------------------------------------- /stack-ghc-9.2.yaml.lock: -------------------------------------------------------------------------------- 1 | # This file was autogenerated by Stack. 2 | # You should not edit this file by hand. 3 | # For more information, please see the documentation at: 4 | # https://docs.haskellstack.org/en/stable/lock_files 5 | 6 | packages: 7 | - completed: 8 | hackage: crypton-1.0.1@sha256:f41316fbc6ad878396e476355e27b70ac35c344d74e3eefafe709e03b192be9e,14527 9 | pantry-tree: 10 | sha256: b8f0a33755a0871d325300024592cd9306e393941248184e9c4faf417d4adfcd 11 | size: 23276 12 | original: 13 | hackage: crypton-1.0.1@sha256:f41316fbc6ad878396e476355e27b70ac35c344d74e3eefafe709e03b192be9e,14527 14 | snapshots: 15 | - completed: 16 | sha256: 401a0e813162ba62f04517f60c7d25e93a0f867f94a902421ebf07d1fb5a8c46 17 | size: 650044 18 | url: https://raw.githubusercontent.com/commercialhaskell/stackage-snapshots/master/lts/20/21.yaml 19 | original: lts-20.21 20 | -------------------------------------------------------------------------------- /stack-ghc-9.4.yaml: -------------------------------------------------------------------------------- 1 | resolver: lts-21.25 2 | require-stack-version: ">= 1.0.0" 3 | 4 | packages: 5 | - '.' 6 | 7 | flags: {} 8 | -------------------------------------------------------------------------------- /stack-ghc-9.4.yaml.lock: -------------------------------------------------------------------------------- 1 | # This file was autogenerated by Stack. 2 | # You should not edit this file by hand. 3 | # For more information, please see the documentation at: 4 | # https://docs.haskellstack.org/en/stable/lock_files 5 | 6 | packages: [] 7 | snapshots: 8 | - completed: 9 | sha256: 0a8278a8c9f33fbdaa40167e035065a7ca8ed61d89fec21236b888c470dab3f0 10 | size: 625218 11 | url: https://raw.githubusercontent.com/commercialhaskell/stackage-snapshots/master/nightly/2023/5/8.yaml 12 | original: nightly-2023-05-08 13 | -------------------------------------------------------------------------------- /stack-ghc-9.6.yaml: -------------------------------------------------------------------------------- 1 | resolver: lts-22.34 2 | require-stack-version: ">= 1.0.0" 3 | 4 | packages: 5 | - '.' 6 | 7 | flags: {} 8 | -------------------------------------------------------------------------------- /stack-ghc-9.6.yaml.lock: -------------------------------------------------------------------------------- 1 | # This file was autogenerated by Stack. 2 | # You should not edit this file by hand. 3 | # For more information, please see the documentation at: 4 | # https://docs.haskellstack.org/en/stable/lock_files 5 | 6 | packages: [] 7 | snapshots: 8 | - completed: 9 | sha256: edbd50d7e7c85c13ad5f5835ae2db92fab1e9cf05ecf85340e2622ec0a303df1 10 | size: 720020 11 | url: https://raw.githubusercontent.com/commercialhaskell/stackage-snapshots/master/lts/22/34.yaml 12 | original: lts-22.34 13 | -------------------------------------------------------------------------------- /stack-ghc-9.8.yaml: -------------------------------------------------------------------------------- 1 | resolver: lts-23.4 2 | require-stack-version: ">= 1.0.0" 3 | 4 | packages: 5 | - '.' 6 | 7 | flags: {} 8 | -------------------------------------------------------------------------------- /stack-ghc-9.8.yaml.lock: -------------------------------------------------------------------------------- 1 | # This file was autogenerated by Stack. 2 | # You should not edit this file by hand. 3 | # For more information, please see the documentation at: 4 | # https://docs.haskellstack.org/en/stable/lock_files 5 | 6 | packages: [] 7 | snapshots: 8 | - completed: 9 | sha256: 0d61fd2be255f5c425cd92dbb4a78d1f70af2c138f3ec921e98b97ae182b044c 10 | size: 679291 11 | url: https://raw.githubusercontent.com/commercialhaskell/stackage-snapshots/master/lts/23/4.yaml 12 | original: lts-23.4 13 | -------------------------------------------------------------------------------- /stack.yaml: -------------------------------------------------------------------------------- 1 | stack-ghc-9.6.yaml -------------------------------------------------------------------------------- /stack.yaml.lock: -------------------------------------------------------------------------------- 1 | # This file was autogenerated by Stack. 2 | # You should not edit this file by hand. 3 | # For more information, please see the documentation at: 4 | # https://docs.haskellstack.org/en/stable/lock_files 5 | 6 | packages: [] 7 | snapshots: 8 | - completed: 9 | sha256: edbd50d7e7c85c13ad5f5835ae2db92fab1e9cf05ecf85340e2622ec0a303df1 10 | size: 720020 11 | url: https://raw.githubusercontent.com/commercialhaskell/stackage-snapshots/master/lts/22/34.yaml 12 | original: lts-22.34 13 | -------------------------------------------------------------------------------- /test/Data/LargeHashable/Tests/Class.hs: -------------------------------------------------------------------------------- 1 | {-# LANGUAGE OverloadedStrings #-} 2 | {-# OPTIONS_GHC -F -pgmF htfpp #-} 3 | module Data.LargeHashable.Tests.Class where 4 | 5 | -- keep imports in alphabetic order (in Emacs, use "M-x sort-lines") 6 | import Data.Fixed 7 | import Data.Hashable 8 | import Data.Int 9 | import Data.LargeHashable 10 | import Data.LargeHashable.Tests.Helper 11 | import Data.Map (Map ()) 12 | import Data.Ratio 13 | import Data.Set (Set ()) 14 | import Data.Time.Calendar 15 | import Data.Time.Clock 16 | import Data.Time.Clock.TAI 17 | import Data.Time.LocalTime 18 | import Data.Word 19 | import Test.Framework hiding (Fixed (..)) 20 | import qualified Data.Aeson as J 21 | import qualified Data.ByteString as B 22 | import qualified Data.ByteString.Lazy as BL 23 | import qualified Data.ByteString.Short as BS 24 | import qualified Data.HashMap.Lazy as HML 25 | import qualified Data.HashMap.Strict as HM 26 | import qualified Data.HashSet as HS 27 | import qualified Data.IntMap as IM 28 | import qualified Data.IntSet as IS 29 | import qualified Data.Map as Map 30 | import qualified Data.Scientific as Sci 31 | import qualified Data.Sequence as Seq 32 | import qualified Data.Set as Set 33 | import qualified Data.Strict.Tuple as Tuple 34 | import qualified Data.Text as T 35 | import qualified Data.Text.Lazy as TL 36 | import qualified Data.Vector as V 37 | 38 | -- of course we can't fully prove uniqueness using 39 | -- properties and there is a small chance of collisions 40 | generic_uniquenessProp :: (Eq a, LargeHashable a) => a -> a -> Bool 41 | generic_uniquenessProp a b = (a == b) == (largeHash md5HashAlgorithm a == largeHash md5HashAlgorithm b) 42 | 43 | prop_integerUniqueness :: Integer -> Integer -> Bool 44 | prop_integerUniqueness = generic_uniquenessProp 45 | 46 | test_hashInteger :: IO () 47 | test_hashInteger = 48 | do subAssert $ assertHashes 0 49 | subAssert $ assertHashes (toInteger (maxBound::Int)) 50 | subAssert $ assertHashes (toInteger (minBound::Int)) 51 | where 52 | assertHashes :: Integer -> IO () 53 | assertHashes n = 54 | do let list = [(n-100)..(n+100)] 55 | hashes = map (largeHash md5HashAlgorithm) list 56 | hashesSet = Set.fromList hashes 57 | assertEqual (length list) (Set.size hashesSet) 58 | 59 | test_hashBool :: IO () 60 | test_hashBool = 61 | assertBool (largeHash md5HashAlgorithm True /= largeHash md5HashAlgorithm False) 62 | 63 | prop_genericUniqueness :: TestA -> TestA -> Bool 64 | prop_genericUniqueness = generic_uniquenessProp 65 | 66 | prop_intUniqueness :: Int -> Int -> Bool 67 | prop_intUniqueness = generic_uniquenessProp 68 | 69 | prop_int8Uniqueness :: Int8 -> Int8 -> Bool 70 | prop_int8Uniqueness = generic_uniquenessProp 71 | 72 | prop_int16Uniqueness :: Int16 -> Int16 -> Bool 73 | prop_int16Uniqueness = generic_uniquenessProp 74 | 75 | prop_int32Uniqueness :: Int32 -> Int32 -> Bool 76 | prop_int32Uniqueness = generic_uniquenessProp 77 | 78 | prop_int64Uniqueness :: Int64 -> Int64 -> Bool 79 | prop_int64Uniqueness = generic_uniquenessProp 80 | 81 | prop_wordUniqueness :: Word -> Word -> Bool 82 | prop_wordUniqueness = generic_uniquenessProp 83 | 84 | prop_word8Uniqueness :: Word8 -> Word8 -> Bool 85 | prop_word8Uniqueness = generic_uniquenessProp 86 | 87 | prop_word16Uniqueness :: Word16 -> Word16 -> Bool 88 | prop_word16Uniqueness = generic_uniquenessProp 89 | 90 | prop_word32Uniqueness :: Word32 -> Word32 -> Bool 91 | prop_word32Uniqueness = generic_uniquenessProp 92 | 93 | prop_word64Uniqueness :: Word64 -> Word64 -> Bool 94 | prop_word64Uniqueness = generic_uniquenessProp 95 | 96 | prop_charUniqueness :: Char -> Char -> Bool 97 | prop_charUniqueness = generic_uniquenessProp 98 | 99 | prop_bytestringUniqueness :: B.ByteString -> B.ByteString -> Bool 100 | prop_bytestringUniqueness = generic_uniquenessProp 101 | 102 | prop_appendByteStringOk :: B.ByteString -> B.ByteString -> Bool 103 | prop_appendByteStringOk b1 b2 = 104 | runMD5 (updateHash (b1 `B.append` b2)) /= 105 | runMD5 (updateHash b1 >> updateHash b2) 106 | 107 | test_irrelevantByteStringChunking :: IO () 108 | test_irrelevantByteStringChunking = do 109 | assertEqual (largeHash md5HashAlgorithm (BL.fromChunks ["foo", "ba", "r"])) 110 | (largeHash md5HashAlgorithm (BL.fromChunks ["foob", "ar"])) 111 | 112 | prop_lazyBytestringUniqueness :: BL.ByteString -> BL.ByteString -> Bool 113 | prop_lazyBytestringUniqueness = generic_uniquenessProp 114 | 115 | prop_appendLazyByteStringOk :: BL.ByteString -> BL.ByteString -> Bool 116 | prop_appendLazyByteStringOk b1 b2 = 117 | runMD5 (updateHash (b1 `BL.append` b2)) /= 118 | runMD5 (updateHash b1 >> updateHash b2) 119 | 120 | prop_shortBytestringUniqueness :: BS.ShortByteString -> BS.ShortByteString -> Bool 121 | prop_shortBytestringUniqueness = generic_uniquenessProp 122 | 123 | prop_textUniqueness :: T.Text -> T.Text -> Bool 124 | prop_textUniqueness = generic_uniquenessProp 125 | 126 | prop_appendTextOk :: T.Text -> T.Text -> Bool 127 | prop_appendTextOk t1 t2 = 128 | runMD5 (updateHash (t1 `T.append` t2)) /= 129 | runMD5 (updateHash t1 >> updateHash t2) 130 | 131 | test_irrelevantTextChunking :: IO () 132 | test_irrelevantTextChunking = do 133 | assertEqual (largeHash md5HashAlgorithm (TL.fromChunks ["don't", " pa", "nic"])) 134 | (largeHash md5HashAlgorithm (TL.fromChunks ["don", "'t p", "an", "ic"])) 135 | 136 | prop_lazyTextUniqueness :: TL.Text -> TL.Text -> Bool 137 | prop_lazyTextUniqueness = generic_uniquenessProp 138 | 139 | prop_appendLazyTextOk :: TL.Text -> TL.Text -> Bool 140 | prop_appendLazyTextOk t1 t2 = 141 | runMD5 (updateHash (t1 `TL.append` t2)) /= 142 | runMD5 (updateHash t1 >> updateHash t2) 143 | 144 | prop_lazyStrictTextSame :: [T.Text] -> Bool 145 | prop_lazyStrictTextSame chunks = 146 | let lazy = TL.fromChunks chunks 147 | strict = mconcat chunks 148 | in runMD5 (updateHash lazy) == runMD5 (updateHash strict) 149 | 150 | prop_listUniqueness :: [Bool] -> [Bool] -> Bool 151 | prop_listUniqueness = generic_uniquenessProp 152 | 153 | prop_appendListOk :: [Int] -> [Int] -> Bool 154 | prop_appendListOk l1 l2 = 155 | runMD5 (updateHash (l1 ++ l2)) /= 156 | runMD5 (updateHash l1 >> updateHash l2) 157 | 158 | prop_DoubleUniqueness :: Double -> Double -> Bool 159 | prop_DoubleUniqueness = generic_uniquenessProp 160 | 161 | prop_FloatUniqueness :: Float -> Float -> Bool 162 | prop_FloatUniqueness = generic_uniquenessProp 163 | 164 | prop_setUniqueness :: Set Int -> Set Int -> Bool 165 | prop_setUniqueness = generic_uniquenessProp 166 | 167 | test_hashSet :: IO () 168 | test_hashSet = 169 | do let s1 = Set.fromList [1::Int, 2, 3] 170 | s2 = Set.fromList [3::Int, 2, 1] 171 | s3 = Set.fromList [2::Int, 1, 3] 172 | assertEqual (largeHash md5HashAlgorithm s1) (largeHash md5HashAlgorithm s2) 173 | assertEqual (largeHash md5HashAlgorithm s1) (largeHash md5HashAlgorithm s3) 174 | assertEqual (largeHash md5HashAlgorithm s2) (largeHash md5HashAlgorithm s3) 175 | 176 | prop_unionSetOk :: Set Int -> Set Int -> Bool 177 | prop_unionSetOk s1 s2 = 178 | runMD5 (updateHash (s1 `Set.union` s2)) /= runMD5 (updateHash s1 >> updateHash s2) 179 | 180 | prop_intSetUniqueness :: IS.IntSet -> IS.IntSet -> Bool 181 | prop_intSetUniqueness = generic_uniquenessProp 182 | 183 | test_hashIntSet :: IO () 184 | test_hashIntSet = 185 | do let s1 = IS.fromList [1::Int, 2, 3] 186 | s2 = IS.fromList [3::Int, 2, 1] 187 | s3 = IS.fromList [2::Int, 1, 3] 188 | assertEqual (largeHash md5HashAlgorithm s1) (largeHash md5HashAlgorithm s2) 189 | assertEqual (largeHash md5HashAlgorithm s1) (largeHash md5HashAlgorithm s3) 190 | assertEqual (largeHash md5HashAlgorithm s2) (largeHash md5HashAlgorithm s3) 191 | 192 | prop_unionIntSetOk :: IS.IntSet -> IS.IntSet -> Bool 193 | prop_unionIntSetOk s1 s2 = 194 | runMD5 (updateHash (s1 `IS.union` s2)) /= 195 | runMD5 (updateHash s1 >> updateHash s2) 196 | 197 | prop_hashSetUniqueness :: HS.HashSet Int -> HS.HashSet Int -> Bool 198 | prop_hashSetUniqueness = generic_uniquenessProp 199 | 200 | data ConstHash 201 | = ConstHash Int 202 | deriving (Eq) 203 | 204 | instance Hashable ConstHash where 205 | hashWithSalt _ _ = 0 206 | 207 | instance LargeHashable ConstHash where 208 | updateHash (ConstHash i) = updateHash i 209 | updateHashStable (ConstHash i) = updateHashStable i 210 | 211 | test_hashHashSet :: IO () 212 | test_hashHashSet = 213 | do let s1 = HS.fromList [ConstHash 1, ConstHash 2, ConstHash 3] 214 | s2 = HS.fromList [ConstHash 3, ConstHash 2, ConstHash 1] 215 | s3 = HS.fromList [ConstHash 2, ConstHash 1, ConstHash 3] 216 | assertEqual (largeHash md5HashAlgorithm s1) (largeHash md5HashAlgorithm s2) 217 | assertEqual (largeHash md5HashAlgorithm s1) (largeHash md5HashAlgorithm s3) 218 | assertEqual (largeHash md5HashAlgorithm s2) (largeHash md5HashAlgorithm s3) 219 | 220 | prop_unionHashSetOk :: HS.HashSet Int -> HS.HashSet Int -> Bool 221 | prop_unionHashSetOk s1 s2 = 222 | runMD5 (updateHash (s1 `HS.union` s2)) /= 223 | runMD5 (updateHash s1 >> updateHash s2) 224 | 225 | prop_mapUniqueness :: Map Int String -> Map Int String -> Bool 226 | prop_mapUniqueness = generic_uniquenessProp 227 | 228 | test_hashMap :: IO () 229 | test_hashMap = 230 | do let m1 = Map.fromList [(1::Int, "1"::String), (2, "2"), (3, "3")] 231 | m2 = Set.fromList [(3::Int, "3"::String), (2, "2"), (1, "1")] 232 | m3 = Set.fromList [(2::Int, "2"::String), (1, "1"), (3, "3")] 233 | assertEqual (largeHash md5HashAlgorithm m1) (largeHash md5HashAlgorithm m2) 234 | assertEqual (largeHash md5HashAlgorithm m1) (largeHash md5HashAlgorithm m3) 235 | assertEqual (largeHash md5HashAlgorithm m2) (largeHash md5HashAlgorithm m3) 236 | 237 | prop_unionMapOk :: Map Int Bool -> Map Int Bool -> Bool 238 | prop_unionMapOk m1 m2 = 239 | runMD5 (updateHash (m1 `Map.union` m2)) /= runMD5 (updateHash m1 >> updateHash m2) 240 | 241 | prop_intMapUniqueness :: IM.IntMap String -> IM.IntMap String -> Bool 242 | prop_intMapUniqueness = generic_uniquenessProp 243 | 244 | test_hashIntMap :: IO () 245 | test_hashIntMap = 246 | do let m1 = IM.fromList [(1::Int, "1"::String), (2, "2"), (3, "3")] 247 | m2 = IM.fromList [(3::Int, "3"::String), (2, "2"), (1, "1")] 248 | m3 = IM.fromList [(2::Int, "2"::String), (1, "1"), (3, "3")] 249 | assertEqual (largeHash md5HashAlgorithm m1) (largeHash md5HashAlgorithm m2) 250 | assertEqual (largeHash md5HashAlgorithm m1) (largeHash md5HashAlgorithm m3) 251 | assertEqual (largeHash md5HashAlgorithm m2) (largeHash md5HashAlgorithm m3) 252 | 253 | prop_unionIntMapOk :: IM.IntMap Bool -> IM.IntMap Bool -> Bool 254 | prop_unionIntMapOk m1 m2 = 255 | runMD5 (updateHash (m1 `IM.union` m2)) /= runMD5 (updateHash m1 >> updateHash m2) 256 | 257 | prop_hashMapUniqueness :: HM.HashMap Int Bool -> HM.HashMap Int Bool -> Bool 258 | prop_hashMapUniqueness = generic_uniquenessProp 259 | 260 | test_hashHashMap :: IO () 261 | test_hashHashMap = 262 | do let val = "foo" :: String 263 | m1 = HM.fromList [(ConstHash 1, val), (ConstHash 2, val), (ConstHash 3, val)] 264 | m2 = HM.fromList [(ConstHash 3, val), (ConstHash 2, val), (ConstHash 1, val)] 265 | m3 = HM.fromList [(ConstHash 2, val), (ConstHash 1, val), (ConstHash 3, val)] 266 | assertEqual (largeHash md5HashAlgorithm m1) (largeHash md5HashAlgorithm m2) 267 | assertEqual (largeHash md5HashAlgorithm m1) (largeHash md5HashAlgorithm m3) 268 | assertEqual (largeHash md5HashAlgorithm m2) (largeHash md5HashAlgorithm m3) 269 | 270 | prop_unionHashMapOk :: HM.HashMap Int Bool -> HM.HashMap Int Bool -> Bool 271 | prop_unionHashMapOk m1 m2 = 272 | runMD5 (updateHash (m1 `HM.union` m2)) /= runMD5 (updateHash m1 >> updateHash m2) 273 | 274 | prop_hashMapLazyUniqueness :: HML.HashMap Int Bool -> HML.HashMap Int Bool -> Bool 275 | prop_hashMapLazyUniqueness = generic_uniquenessProp 276 | 277 | test_hashHashMapLazy :: IO () 278 | test_hashHashMapLazy = 279 | do let val = "foo" :: String 280 | m1 = HML.fromList [(ConstHash 1, val), (ConstHash 2, val), (ConstHash 3, val)] 281 | m2 = HML.fromList [(ConstHash 3, val), (ConstHash 2, val), (ConstHash 1, val)] 282 | m3 = HML.fromList [(ConstHash 2, val), (ConstHash 1, val), (ConstHash 3, val)] 283 | assertEqual (largeHash md5HashAlgorithm m1) (largeHash md5HashAlgorithm m2) 284 | assertEqual (largeHash md5HashAlgorithm m1) (largeHash md5HashAlgorithm m3) 285 | assertEqual (largeHash md5HashAlgorithm m2) (largeHash md5HashAlgorithm m3) 286 | 287 | prop_hashPairUniqueness :: (Int, Int) -> (Int, Int) -> Bool 288 | prop_hashPairUniqueness = generic_uniquenessProp 289 | 290 | prop_hashTripleUniqueness :: (Int, Int, Int) -> (Int, Int, Int) -> Bool 291 | prop_hashTripleUniqueness = generic_uniquenessProp 292 | 293 | prop_hashQuadrupleUniqueness :: (Int, Int, Int, Int) -> (Int, Int, Int, Int) -> Bool 294 | prop_hashQuadrupleUniqueness = generic_uniquenessProp 295 | 296 | prop_hashQuintupleUniqueness :: (Int, Int, Int, Int, Int) -> (Int, Int, Int, Int, Int) -> Bool 297 | prop_hashQuintupleUniqueness = generic_uniquenessProp 298 | 299 | prop_hashStrictPairUniqueness :: Tuple.Pair Int Int -> Tuple.Pair Int Int -> Bool 300 | prop_hashStrictPairUniqueness = generic_uniquenessProp 301 | 302 | prop_hashMaybeUniqueness :: Maybe Int -> Maybe Int -> Bool 303 | prop_hashMaybeUniqueness = generic_uniquenessProp 304 | 305 | prop_hashEitherUniqueness :: Either Int Bool -> Either Int Bool -> Bool 306 | prop_hashEitherUniqueness = generic_uniquenessProp 307 | 308 | prop_hashOrderingUniqueness :: Ordering -> Ordering -> Bool 309 | prop_hashOrderingUniqueness = generic_uniquenessProp 310 | 311 | prop_ratioUniqueness :: Ratio Int -> Ratio Int -> Bool 312 | prop_ratioUniqueness = generic_uniquenessProp 313 | 314 | test_ratio :: IO () 315 | test_ratio = 316 | do let r1 = (1::Int) % 2 317 | r2 = (2::Int) % 4 318 | assertEqual (largeHash md5HashAlgorithm r1) (largeHash md5HashAlgorithm r2) 319 | 320 | prop_dayUniqueness :: Day -> Day -> Bool 321 | prop_dayUniqueness = generic_uniquenessProp 322 | 323 | prop_diffTimeUniqueness :: DiffTime -> DiffTime -> Bool 324 | prop_diffTimeUniqueness = generic_uniquenessProp 325 | 326 | prop_utcTimeUniqueness :: UTCTime -> UTCTime -> Bool 327 | prop_utcTimeUniqueness = generic_uniquenessProp 328 | 329 | prop_absoluteTimeUniqueness :: AbsoluteTime -> AbsoluteTime -> Bool 330 | prop_absoluteTimeUniqueness = generic_uniquenessProp 331 | 332 | prop_nominalDiffTimeUniqueness :: NominalDiffTime -> NominalDiffTime -> Bool 333 | prop_nominalDiffTimeUniqueness = generic_uniquenessProp 334 | 335 | prop_timeZoneUniqueness :: TimeZone -> TimeZone -> Bool 336 | prop_timeZoneUniqueness = generic_uniquenessProp 337 | 338 | prop_timeOfDayUniqueness :: TimeOfDay -> TimeOfDay -> Bool 339 | prop_timeOfDayUniqueness = generic_uniquenessProp 340 | 341 | prop_localTimeUniqueness :: LocalTime -> LocalTime -> Bool 342 | prop_localTimeUniqueness = generic_uniquenessProp 343 | 344 | prop_fixedUniqueness :: Fixed E1 -> Fixed E1 -> Bool 345 | prop_fixedUniqueness = generic_uniquenessProp 346 | 347 | prop_scientificUniqueness :: Sci.Scientific -> Sci.Scientific -> Bool 348 | prop_scientificUniqueness = generic_uniquenessProp 349 | 350 | prop_aesonValueUniqueness :: J.Value -> J.Value -> Bool 351 | prop_aesonValueUniqueness = generic_uniquenessProp 352 | 353 | prop_vectorUniqueness :: V.Vector Int -> V.Vector Int -> Bool 354 | prop_vectorUniqueness = generic_uniquenessProp 355 | 356 | prop_appendVectorOk :: V.Vector Int -> V.Vector Int -> Bool 357 | prop_appendVectorOk v1 v2 = 358 | runMD5 (updateHash (v1 V.++ v2)) /= 359 | runMD5 (updateHash v1 >> updateHash v2) 360 | 361 | prop_seqUniqueness :: Seq.Seq Int -> Seq.Seq Int -> Bool 362 | prop_seqUniqueness = generic_uniquenessProp 363 | 364 | prop_appendSeqOk :: Seq.Seq Int -> Seq.Seq Int -> Bool 365 | prop_appendSeqOk s1 s2 = 366 | runMD5 (updateHash (s1 Seq.>< s2)) /= 367 | runMD5 (updateHash s1 >> updateHash s2) 368 | 369 | -- regression test for #25 370 | test_textHash :: IO () 371 | test_textHash = do 372 | let t1 = T.pack "abcdefgh" 373 | t2 = T.pack "abcdxxxx" 374 | assertEqual 375 | (largeHash md5HashAlgorithm (T.take 4 t1)) 376 | (largeHash md5HashAlgorithm (T.take 4 t2)) 377 | assertNotEqual 378 | (largeHash md5HashAlgorithm (T.take 5 t1)) 379 | (largeHash md5HashAlgorithm (T.take 5 t2)) 380 | -------------------------------------------------------------------------------- /test/Data/LargeHashable/Tests/Helper.hs: -------------------------------------------------------------------------------- 1 | {-# OPTIONS_GHC -fno-warn-orphans #-} 2 | {-# LANGUAGE CPP #-} 3 | {-# LANGUAGE DeriveGeneric #-} 4 | {-# LANGUAGE OverloadedStrings #-} 5 | module Data.LargeHashable.Tests.Helper where 6 | 7 | -- keep imports in alphabetic order (in Emacs, use "M-x sort-lines") 8 | import Control.Monad 9 | import Data.Bytes.Serial 10 | import Data.Hashable 11 | import Data.LargeHashable 12 | import Data.Time.Calendar 13 | import Data.Time.Clock 14 | import Data.Time.Clock.TAI 15 | import Data.Time.LocalTime 16 | import GHC.Generics 17 | import Test.QuickCheck 18 | import qualified Data.ByteString as B 19 | import qualified Data.ByteString.Lazy as BL 20 | import qualified Data.ByteString.Short as BS 21 | import qualified Data.HashMap.Lazy as HML 22 | import qualified Data.HashSet as HS 23 | import qualified Data.Scientific as Sci 24 | import qualified Data.Strict.Tuple as Tuple 25 | import qualified Data.Text as T 26 | import qualified Data.Text.Lazy as TL 27 | import qualified Data.Vector as V 28 | 29 | #if !MIN_VERSION_QuickCheck(2,8,2) 30 | -- keep imports in alphabetic order (in Emacs, use "M-x sort-lines") 31 | import qualified Data.IntMap as IM 32 | import qualified Data.IntSet as IS 33 | import qualified Data.Map as M 34 | import qualified Data.Set as S 35 | #endif 36 | 37 | instance Arbitrary T.Text where 38 | arbitrary = fmap T.pack arbitrary 39 | shrink = map T.pack . shrink . T.unpack 40 | 41 | instance Arbitrary B.ByteString where 42 | arbitrary = fmap B.pack arbitrary 43 | shrink = map B.pack . shrink . B.unpack 44 | 45 | instance Arbitrary BL.ByteString where 46 | arbitrary = fmap BL.fromChunks arbitrary 47 | shrink = map BL.fromChunks . shrink . BL.toChunks 48 | 49 | instance Arbitrary TL.Text where 50 | arbitrary = fmap TL.fromChunks arbitrary 51 | shrink = map TL.fromChunks . shrink . TL.toChunks 52 | 53 | instance Arbitrary a => Arbitrary (V.Vector a) where 54 | arbitrary = liftM V.fromList arbitrary 55 | shrink = map V.fromList . shrink . V.toList 56 | 57 | instance Arbitrary Sci.Scientific where 58 | arbitrary = 59 | liftM2 Sci.scientific arbitrary arbitrary 60 | shrink = shrinkRealFrac 61 | 62 | #if !MIN_VERSION_aeson(2,0,0) 63 | instance Arbitrary J.Value where 64 | arbitrary = sized arbitraryJsonValue 65 | where 66 | arbitraryJsonValue n = 67 | if n <= 0 68 | then oneof simpleGens 69 | else frequency $ 70 | (map (\g -> (1, g)) simpleGens) ++ 71 | [(3, arbitraryObject (n `div` 2)) 72 | ,(3, arbitraryArray (n `div` 2))] 73 | arbitraryObject n = 74 | do l <- arbitraryListOfJsonValues n 75 | elems <- forM l $ \v -> 76 | do k <- 77 | elements 78 | ["key", "foo", "bar", "baz", "spam", "egg", "chicken", "dog" 79 | ,"any", "what", "santa", "mark", "wardrobe", "baseball"] 80 | return (k, v) 81 | return (J.Object (HM.fromList elems)) 82 | arbitraryArray n = 83 | do l <- arbitraryListOfJsonValues n 84 | return (J.Array (V.fromList l)) 85 | arbitraryListOfJsonValues n = 86 | do size <- elements [(0::Int)..10] 87 | forM [1..(size::Int)] $ \_ -> arbitraryJsonValue n 88 | simpleGens = 89 | [liftM J.String arbitrary 90 | ,liftM J.Number arbitrary 91 | ,liftM J.Bool arbitrary 92 | ,return J.Null] 93 | shrink v = 94 | case v of 95 | J.Object obj -> 96 | map J.Object (shrink obj) 97 | J.Array arr -> 98 | map J.Array (shrink arr) 99 | J.String t -> 100 | map J.String (shrink t) 101 | J.Number n -> 102 | map J.Number (shrink n) 103 | J.Bool _ -> [] 104 | J.Null -> [] 105 | #endif 106 | 107 | instance (Eq k, Hashable k, Arbitrary k, Arbitrary a) => Arbitrary (HML.HashMap k a) where 108 | arbitrary = fmap HML.fromList arbitrary 109 | shrink = map HML.fromList . shrink . HML.toList 110 | 111 | instance (Eq a, Hashable a, Arbitrary a) => Arbitrary (HS.HashSet a) where 112 | arbitrary = fmap HS.fromList arbitrary 113 | shrink = map HS.fromList . shrink . HS.toList 114 | 115 | #if !MIN_VERSION_QuickCheck(2,8,2) 116 | instance (Ord k, Arbitrary k, Arbitrary a) => Arbitrary (M.Map k a) where 117 | arbitrary = fmap M.fromList arbitrary 118 | shrink = map M.fromList . shrink . M.toList 119 | 120 | instance Arbitrary a => Arbitrary (IM.IntMap a) where 121 | arbitrary = fmap IM.fromList arbitrary 122 | shrink = map IM.fromList . shrink . IM.toList 123 | 124 | instance (Ord a, Arbitrary a) => Arbitrary (S.Set a) where 125 | arbitrary = fmap S.fromList arbitrary 126 | shrink = map S.fromList . shrink . S.toList 127 | 128 | instance Arbitrary IS.IntSet where 129 | arbitrary = fmap IS.fromList arbitrary 130 | shrink = map IS.fromList . shrink . IS.toList 131 | 132 | #endif 133 | 134 | data TestA 135 | = TestA 136 | { age :: Int 137 | , wealth :: Integer 138 | , isStudent :: Bool 139 | , name :: T.Text 140 | , initials :: [Char] 141 | , legLength :: (Int, Int) 142 | } deriving (Generic, Eq, Show) 143 | 144 | instance LargeHashable TestA 145 | instance Serial TestA 146 | 147 | instance Arbitrary TestA where 148 | arbitrary = 149 | TestA <$> arbitrary <*> arbitrary <*> arbitrary <*> 150 | arbitrary <*> arbitrary <*> arbitrary 151 | 152 | instance Arbitrary BS.ShortByteString where 153 | arbitrary = liftM BS.pack arbitrary 154 | shrink x = 155 | map BS.pack (shrink (BS.unpack x)) 156 | 157 | instance Arbitrary Day where 158 | arbitrary = 159 | do n <- arbitrary 160 | return (ModifiedJulianDay (n `mod` 1000)) 161 | shrink (ModifiedJulianDay n) = 162 | map ModifiedJulianDay (shrink n) 163 | 164 | instance Arbitrary DiffTime where 165 | arbitrary = 166 | liftM picosecondsToDiffTime arbitrary 167 | shrink delta = 168 | let n = toRational delta 169 | in map fromRational (shrink n) 170 | 171 | instance Arbitrary UTCTime where 172 | arbitrary = 173 | do day <- arbitrary 174 | picos <- arbitrary 175 | return (UTCTime day (picosecondsToDiffTime (picos `mod` 86401))) 176 | 177 | instance Arbitrary AbsoluteTime where 178 | arbitrary = 179 | do diff <- arbitrary 180 | return (addAbsoluteTime diff taiEpoch) 181 | 182 | instance Arbitrary NominalDiffTime where 183 | arbitrary = 184 | do r <- arbitrary 185 | return (fromRational r) 186 | 187 | instance Arbitrary TimeZone where 188 | arbitrary = 189 | do n <- arbitrary 190 | s <- arbitrary 191 | b <- arbitrary 192 | return (TimeZone (n `mod` (60*24)) b s) 193 | 194 | instance Arbitrary TimeOfDay where 195 | arbitrary = 196 | do h <- arbitrary 197 | m <- arbitrary 198 | s <- arbitrary 199 | return (TimeOfDay (h `mod` 24) (m `mod` 60) (fromInteger (s `mod` 61))) 200 | 201 | instance Arbitrary LocalTime where 202 | arbitrary = 203 | do d <- arbitrary 204 | t <- arbitrary 205 | return (LocalTime d t) 206 | 207 | instance Arbitrary ZonedTime where 208 | arbitrary = 209 | do t <- arbitrary 210 | z <- arbitrary 211 | return (ZonedTime t z) 212 | 213 | instance Arbitrary UniversalTime where 214 | arbitrary = 215 | do r <- arbitrary 216 | return (ModJulianDate r) 217 | 218 | instance (Arbitrary a, Arbitrary b) => Arbitrary (Tuple.Pair a b) where 219 | arbitrary = 220 | do x <- arbitrary 221 | y <- arbitrary 222 | return (x Tuple.:!: y) 223 | 224 | instance Arbitrary Word128 where 225 | arbitrary = Word128 <$> arbitrary <*> arbitrary 226 | 227 | instance Arbitrary Word256 where 228 | arbitrary = Word256 <$> arbitrary <*> arbitrary 229 | -------------------------------------------------------------------------------- /test/Data/LargeHashable/Tests/Inspection.hs: -------------------------------------------------------------------------------- 1 | {-# LANGUAGE TemplateHaskell #-} 2 | {-# LANGUAGE DeriveGeneric #-} 3 | {-# OPTIONS_GHC -F -pgmF htfpp #-} 4 | {-# OPTIONS_GHC -O -fplugin Test.Inspection.Plugin #-} 5 | module Data.LargeHashable.Tests.Inspection where 6 | 7 | import Test.Framework hiding ((===), Failure, Success) 8 | import Test.Inspection 9 | 10 | import Data.LargeHashable 11 | import Data.LargeHashable.Tests.Helper 12 | import Data.LargeHashable.Class 13 | import GHC.Generics 14 | 15 | genericUpdateHashTestA :: TestA -> LH () 16 | genericUpdateHashTestA = genericUpdateHash 17 | 18 | test_genericProductGetsOptimized :: IO () 19 | test_genericProductGetsOptimized = 20 | case $(inspectTest (hasNoGenerics 'genericUpdateHashTestA)) of 21 | Success _ -> return () 22 | Failure e -> assertFailure e 23 | 24 | data SumTest 25 | = A Int 26 | | B Char 27 | | C Integer 28 | | D (Either Int Char) 29 | deriving (Generic) 30 | 31 | $(deriveLargeHashable ''SumTest) 32 | 33 | genericUpdateHashSum :: SumTest -> LH () 34 | genericUpdateHashSum = genericUpdateHash 35 | 36 | thUpdateHashSum :: SumTest -> LH () 37 | thUpdateHashSum = updateHash 38 | 39 | test_genericSumGetsOptimized :: IO () 40 | test_genericSumGetsOptimized = 41 | unitTestPending' "This currently doesn't hold" $ 42 | case $(inspectTest (hasNoGenerics 'genericUpdateHashSum)) of 43 | Success _ -> return () 44 | Failure e -> assertFailure e 45 | 46 | test_genericSumEqTH :: IO () 47 | test_genericSumEqTH = 48 | unitTestPending' "This currently doesn't hold" $ 49 | case $(inspectTest ('genericUpdateHashSum === 'thUpdateHashSum)) of 50 | Success _ -> return () 51 | Failure e -> assertFailure e 52 | 53 | data SopTest 54 | = A2 Char Int Bool 55 | | B2 Int Bool 56 | | C2 (Char, Int) (Maybe Char) 57 | | D2 (Either Int Char) (Char, Int, Integer) 58 | deriving (Generic) 59 | 60 | $(deriveLargeHashable ''SopTest) 61 | 62 | genericUpdateHashSop :: SopTest -> LH () 63 | genericUpdateHashSop = genericUpdateHash 64 | 65 | thUpdateHashSop :: SopTest -> LH () 66 | thUpdateHashSop = updateHash 67 | 68 | test_genericSumOfProductsGetsOptimized :: IO () 69 | test_genericSumOfProductsGetsOptimized = 70 | unitTestPending' "This currently doesn't hold" $ 71 | case $(inspectTest (hasNoGenerics 'genericUpdateHashSop)) of 72 | Success _ -> return () 73 | Failure e -> assertFailure e 74 | 75 | test_genericSopEqTH :: IO () 76 | test_genericSopEqTH = 77 | unitTestPending' "This currently doesn't hold" $ 78 | case $(inspectTest ('genericUpdateHashSop === 'thUpdateHashSop)) of 79 | Success _ -> return () 80 | Failure e -> assertFailure e 81 | 82 | data UnitTest = UnitTest deriving (Generic) 83 | 84 | $(deriveLargeHashable ''UnitTest) 85 | 86 | genericUpdateHashUnitType :: UnitTest -> LH () 87 | genericUpdateHashUnitType = genericUpdateHash 88 | 89 | thUpdateHashUnitType :: UnitTest -> LH () 90 | thUpdateHashUnitType = updateHash 91 | 92 | unitTypeReturn :: UnitTest -> LH () 93 | unitTypeReturn UnitTest = return () 94 | 95 | test_genericUnitHashIsNoop :: IO () 96 | test_genericUnitHashIsNoop = 97 | case $(inspectTest ('genericUpdateHashUnitType === 'unitTypeReturn)) of 98 | Success _ -> return () 99 | Failure e -> assertFailure e 100 | 101 | test_thUnitHashIsNoop :: IO () 102 | test_thUnitHashIsNoop = 103 | case $(inspectTest ('thUpdateHashUnitType === 'unitTypeReturn)) of 104 | Success _ -> return () 105 | Failure e -> assertFailure e 106 | 107 | updateHashHaskellUnit :: () -> LH () 108 | updateHashHaskellUnit () = 109 | -- I have to do this twice for inlining to work 110 | do updateHash () 111 | updateHash () 112 | 113 | haskellUnitReturn :: () -> LH () 114 | haskellUnitReturn () = return () 115 | 116 | test_haskellUnitHashIsNoop :: IO () 117 | test_haskellUnitHashIsNoop = 118 | case $(inspectTest ('updateHashHaskellUnit === 'haskellUnitReturn)) of 119 | Success _ -> return () 120 | Failure e -> assertFailure e 121 | -------------------------------------------------------------------------------- /test/Data/LargeHashable/Tests/LargeWord.hs: -------------------------------------------------------------------------------- 1 | {-# OPTIONS_GHC -F -pgmF htfpp #-} 2 | module Data.LargeHashable.Tests.LargeWord where 3 | 4 | -- keep imports in alphabetic order (in Emacs, use "M-x sort-lines") 5 | import Data.LargeHashable.LargeWord 6 | import Data.LargeHashable.Tests.Helper () 7 | import Test.Framework 8 | import qualified Data.ByteString as BS 9 | 10 | prop_w128BsHasLength16 :: Word128 -> Bool 11 | prop_w128BsHasLength16 x = BS.length (w128ToBs x) == 16 12 | 13 | prop_conversionFromAndToWord128 :: Word128 -> Bool 14 | prop_conversionFromAndToWord128 h128 = h128 == bsToW128 (w128ToBs h128) 15 | 16 | prop_conversionFromAndToWord256 :: Word256 -> Bool 17 | prop_conversionFromAndToWord256 h256 = h256 == bsToW256 (w256ToBs h256) 18 | 19 | test_bsToW128 :: IO () 20 | test_bsToW128 = 21 | do assertEqual (Word128 0 0) (bsToW128 BS.empty) 22 | assertEqual (bsToW128 (BS.pack ([0,0,0,0,0,0,0,1] ++ replicate 8 0))) (bsToW128 (BS.pack [1])) 23 | assertEqual (bsToW128 (BS.pack ([1,2,3,4,5,6,7,8,0,0,0,0,0,0,0,9]))) 24 | (bsToW128 (BS.pack [1,2,3,4,5,6,7,8,9])) 25 | -------------------------------------------------------------------------------- /test/Data/LargeHashable/Tests/MD5.hs: -------------------------------------------------------------------------------- 1 | {-# OPTIONS_GHC -F -pgmF htfpp #-} 2 | module Data.LargeHashable.Tests.MD5 where 3 | 4 | import Data.LargeHashable.LargeWord 5 | import Data.LargeHashable.MD5 6 | import Data.LargeHashable.Tests.Helper () 7 | import Test.Framework 8 | 9 | prop_readShowMD5Hash :: Word128 -> Bool 10 | prop_readShowMD5Hash w = 11 | let h = MD5Hash w 12 | in h == read (show h) 13 | -------------------------------------------------------------------------------- /test/Data/LargeHashable/Tests/Stable.hs: -------------------------------------------------------------------------------- 1 | {-# LANGUAGE OverloadedStrings #-} 2 | {-# OPTIONS_GHC -F -pgmF htfpp #-} 3 | module Data.LargeHashable.Tests.Stable where 4 | 5 | import Data.LargeHashable 6 | import qualified Data.Text as T 7 | import qualified Data.Text.Lazy as TL 8 | import Test.Framework 9 | 10 | test_strictTextBackwardsCompat :: IO () 11 | test_strictTextBackwardsCompat = do 12 | -- we check that largeHashStable returns the same hash as for text version 1.* 13 | assertEqual (read "cc1ea58b3c2ec61d19c4ce222bc9e55d") 14 | (largeHashStable md5HashAlgorithm (T.pack "Stefan")) 15 | 16 | test_lazyTextBackwardsCompat :: IO () 17 | test_lazyTextBackwardsCompat = do 18 | -- we check that largeHashStable returns the same hash as for text version 1.* 19 | assertEqual (read "cc1ea58b3c2ec61d19c4ce222bc9e55d") 20 | (largeHashStable md5HashAlgorithm (TL.pack "Stefan")) 21 | 22 | test_lazyTextStableChunking :: IO () 23 | test_lazyTextStableChunking = do 24 | assertEqual (largeHashStable md5HashAlgorithm (TL.fromChunks ["foo", "ba", "r"])) 25 | (largeHashStable md5HashAlgorithm (TL.fromChunks ["foob", "ar"])) 26 | -------------------------------------------------------------------------------- /test/Data/LargeHashable/Tests/TH.hs: -------------------------------------------------------------------------------- 1 | {-# LANGUAGE GADTs #-} 2 | {-# LANGUAGE TemplateHaskell #-} 3 | {-# OPTIONS_GHC -F -pgmF htfpp #-} 4 | {-# OPTIONS_GHC -ddump-splices #-} 5 | module Data.LargeHashable.Tests.TH where 6 | 7 | import Test.Framework 8 | 9 | import Data.LargeHashable 10 | 11 | -- | Simple test data structure that embodies most 12 | -- of the diferrent features of a type 13 | -- the TH deriver can encounter. 14 | data BlaFoo a 15 | = Foo 16 | | Bar Int a 17 | | Baz a a a 18 | deriving (Eq, Show) 19 | 20 | $(deriveLargeHashable ''BlaFoo) 21 | 22 | instance Arbitrary a => Arbitrary (BlaFoo a) where 23 | arbitrary = oneof [ pure Foo 24 | , Bar <$> arbitrary <*> arbitrary 25 | , Baz <$> arbitrary <*> arbitrary <*> arbitrary 26 | ] 27 | 28 | -- | Simple property that tries to find hash collisions. 29 | prop_thDerivedHashUnique :: BlaFoo Char -> BlaFoo Char -> Bool 30 | prop_thDerivedHashUnique x y = (x == y) == (largeHash md5HashAlgorithm x == largeHash md5HashAlgorithm y) 31 | 32 | prop_thDerivedHashStableUnique :: BlaFoo Char -> BlaFoo Char -> Bool 33 | prop_thDerivedHashStableUnique x y = 34 | (x == y) == (largeHashStable md5HashAlgorithm x == largeHashStable md5HashAlgorithm y) 35 | 36 | newtype Fool = Fool { unFool :: Bool } 37 | 38 | $(deriveLargeHashable ''Fool) 39 | 40 | -- | Simple sanity check for TH derived instances for newtypes. 41 | test_newtypeTHHashSane :: IO () 42 | test_newtypeTHHashSane = assertNotEqual (largeHash md5HashAlgorithm (Fool True)) 43 | (largeHash md5HashAlgorithm (Fool False)) 44 | 45 | test_newtypeTHHashStableSane :: IO () 46 | test_newtypeTHHashStableSane = assertNotEqual (largeHashStable md5HashAlgorithm (Fool True)) 47 | (largeHashStable md5HashAlgorithm (Fool False)) 48 | 49 | data HigherKinded t = HigherKinded (t String) 50 | 51 | instance LargeHashable' BlaFoo where 52 | updateHash' = updateHash 53 | updateHashStable' = updateHashStable 54 | 55 | instance LargeHashable' t => LargeHashable (HigherKinded t) where 56 | updateHash (HigherKinded x) = 57 | updateHash' x 58 | updateHashStable (HigherKinded x) = 59 | updateHashStable' x 60 | 61 | test_higherKinded :: IO () 62 | test_higherKinded = 63 | assertNotEqual 64 | (largeHash md5HashAlgorithm (HigherKinded (Bar 42 "Stefan"))) 65 | (largeHash md5HashAlgorithm (HigherKinded (Bar 5 "Stefan"))) 66 | 67 | test_higherKindedStable :: IO () 68 | test_higherKindedStable = 69 | assertNotEqual 70 | (largeHashStable md5HashAlgorithm (HigherKinded (Bar 42 "Stefan"))) 71 | (largeHashStable md5HashAlgorithm (HigherKinded (Bar 5 "Stefan"))) 72 | 73 | data GadtNoArgs where 74 | GadtNoArgsA :: Int -> Char -> GadtNoArgs 75 | GadtNoArgsB :: Integer -> GadtNoArgs 76 | 77 | $(deriveLargeHashable ''GadtNoArgs) 78 | 79 | test_gadtNoArgs :: IO () 80 | test_gadtNoArgs = 81 | assertNotEqual 82 | (largeHash md5HashAlgorithm (GadtNoArgsA 1 'a')) 83 | (largeHash md5HashAlgorithm (GadtNoArgsB 1)) 84 | 85 | data GadtOneArg a where 86 | GadtOneArgA :: Int -> Char -> GadtOneArg Integer 87 | GadtOneArgB :: Integer -> GadtOneArg Char 88 | 89 | $(deriveLargeHashable ''GadtOneArg) 90 | 91 | test_gadtOneArg :: IO () 92 | test_gadtOneArg = 93 | assertNotEqual 94 | (largeHash md5HashAlgorithm (GadtOneArgA 1 'a')) 95 | (largeHash md5HashAlgorithm (GadtOneArgB 1)) 96 | 97 | 98 | data GadtMultipleArgs a b c where 99 | GadtMultipleArgsA :: Int -> Char -> GadtMultipleArgs String Integer Int 100 | GadtMultipleArgsB :: Int -> GadtMultipleArgs Char String Integer 101 | 102 | $(deriveLargeHashable ''GadtMultipleArgs) 103 | 104 | test_gadtMultipleArgs :: IO () 105 | test_gadtMultipleArgs = 106 | assertNotEqual 107 | (largeHash md5HashAlgorithm (GadtMultipleArgsA (1::Int) 'a')) 108 | (largeHash md5HashAlgorithm (GadtMultipleArgsB (1::Int))) 109 | -------------------------------------------------------------------------------- /test/Main.hs: -------------------------------------------------------------------------------- 1 | {-# OPTIONS_GHC -F -pgmF htfpp #-} 2 | module Main where 3 | 4 | import Test.Framework 5 | 6 | -- In Emacs sort block with M-x sort-lines 7 | import {-@ HTF_TESTS @-} Data.LargeHashable.Tests.Class 8 | import {-@ HTF_TESTS @-} Data.LargeHashable.Tests.Inspection 9 | import {-@ HTF_TESTS @-} Data.LargeHashable.Tests.LargeWord 10 | import {-@ HTF_TESTS @-} Data.LargeHashable.Tests.MD5 11 | import {-@ HTF_TESTS @-} Data.LargeHashable.Tests.Stable 12 | import {-@ HTF_TESTS @-} Data.LargeHashable.Tests.TH 13 | 14 | allTests :: [TestSuite] 15 | allTests = htf_importedTests 16 | 17 | main :: IO () 18 | main = htfMain allTests 19 | --------------------------------------------------------------------------------