├── .github
    └── workflows
    │   └── haskell-ci.yml
├── .gitignore
├── .vscode
    └── settings.json
├── ChangeLog
├── LICENSE
├── README.md
├── Setup.hs
├── TODO
├── benchmark
    ├── Data
    │   └── LargeHashable
    │   │   └── Benchmarks
    │   │       ├── CryptoHash.hs
    │   │       ├── Main.hs
    │   │       ├── Serial.hs
    │   │       └── Text.hs
    └── Main.hs
├── cbits
    └── md5.h
├── default.nix
├── hie.yaml
├── large-hashable.cabal
├── large-hashable.nix
├── run-benchmarks.sh
├── run-with-profiling
├── scripts
    └── ci-check
├── src
    └── Data
    │   ├── LargeHashable.hs
    │   └── LargeHashable
    │       ├── Class.hs
    │       ├── Endianness.hs
    │       ├── Intern.hs
    │       ├── LargeWord.hs
    │       ├── MD5.hs
    │       └── TH.hs
├── stack-ghc-9.2.yaml
├── stack-ghc-9.2.yaml.lock
├── stack-ghc-9.4.yaml
├── stack-ghc-9.4.yaml.lock
├── stack-ghc-9.6.yaml
├── stack-ghc-9.6.yaml.lock
├── stack-ghc-9.8.yaml
├── stack-ghc-9.8.yaml.lock
├── stack.yaml
├── stack.yaml.lock
└── test
    ├── Data
        └── LargeHashable
        │   └── Tests
        │       ├── Class.hs
        │       ├── Helper.hs
        │       ├── Inspection.hs
        │       ├── LargeWord.hs
        │       ├── MD5.hs
        │       ├── Stable.hs
        │       └── TH.hs
    ├── Main.hs
    └── bigfile.txt


/.github/workflows/haskell-ci.yml:
--------------------------------------------------------------------------------
 1 | name: Haskell-CI
 2 | on:
 3 |   push:
 4 |     branches: [ master ]
 5 |   pull_request:
 6 |     branches: [ master ]
 7 |   workflow_dispatch:
 8 | jobs:
 9 |   linux:
10 |     name: Haskell-CI - Linux - ${{ matrix.compiler }}
11 |     runs-on: ubuntu-latest
12 |     timeout-minutes:
13 |       60
14 |     container:
15 |       image: skogsbaer/ubuntu:20240502224354
16 |     continue-on-error: false
17 |     strategy:
18 |       matrix:
19 |         include:
20 |           - compiler: ghc-9.8
21 |           - compiler: ghc-9.6
22 |           - compiler: ghc-9.4
23 |           - compiler: ghc-9.2
24 |       fail-fast: false
25 |     steps:
26 |       - name: Set PATH and environment variables
27 |         run: |
28 |           echo "LH_TEST_STACK_YAML=$GITHUB_WORKSPACE/stack-${{ matrix.compiler }}.yaml" >> "$GITHUB_ENV"
29 |       - name: checkout
30 |         uses: actions/checkout@v3
31 |       - name: Permissions bug workaround
32 |         run: "chown -R $(id -un):$(id -gn) ~"
33 |       - name: Compute cache key
34 |         run: |
35 |           lsb_release -a > cache-key 2> /dev/null
36 |           cat large-hashable.cabal $(which stack) "$LH_TEST_STACK_YAML" scripts/ci-check >> cache-key
37 |           LH_CACHE_KEY="$(md5sum cache-key | cut -c 1-32)"
38 |           echo "LH_CACHE_KEY=$LH_CACHE_KEY" >> "$GITHUB_ENV"
39 |           echo "LH_CACHE_KEY=$LH_CACHE_KEY"
40 |       - name: Cache stack dependencies
41 |         uses: actions/cache@v3
42 |         with:
43 |           path: |
44 |             ~/.stack
45 |             ~/.LH-stack-work
46 |           key: ${{ runner.OS }}-stack-cache-v1-${{ matrix.compiler }}-${{ env.LH_CACHE_KEY }}
47 |           restore-keys: |
48 |             ${{ runner.OS }}-stack-cache-v1-${{ matrix.compiler }}
49 |       - name: test
50 |         run: scripts/ci-check
51 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
 1 | dist
 2 | cabal-dev
 3 | *.o
 4 | *.hi
 5 | *.chi
 6 | *.chs.h
 7 | *.dyn_o
 8 | *.dyn_hi
 9 | .virtualenv
10 | .hpc
11 | .hsenv
12 | .cabal-sandbox/
13 | cabal.sandbox.config
14 | *.prof
15 | *.aux
16 | *.hp
17 | .HTF/
18 | .stack-work/
19 | dist-newstyle/
20 | 


--------------------------------------------------------------------------------
/.vscode/settings.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "haskell.checkProject": false,
 3 |     "haskell.checkParents": false,
 4 |     "haskell.trace.client": "info",
 5 |     "haskell.openDocumentationInHackage": true,
 6 |     "haskell.openSourceInHackage": true,
 7 |     "haskell.sessionLoading": "multipleComponents",
 8 |     "haskell.plugin.hlint.codeActionsOn": false,
 9 |     "haskell.plugin.hlint.diagnosticsOn": false,
10 |     "haskell.plugin.semanticTokens.globalOn": true
11 | }
12 | 


--------------------------------------------------------------------------------
/ChangeLog:
--------------------------------------------------------------------------------
 1 | 0.1.2.0: 2025-01-13
 2 | 	* replace cryptonite (deprecated) with crypton
 3 | 
 4 | 0.1.1.0: 2023-05-19
 5 | 	* fix for text version 2. The fix causes hashes for text values to change. Use the new
 6 |           function `largeHashStable` if you want to avoid that. (`largeHashStable` comes
 7 |           with a performance penalty, though.)
 8 | 
 9 | 0.1.0.4: 2017-03-19
10 |         * fixed build on `i686` related to non-fixed size integers
11 | 
12 | 0.1.0.2: 2016-10-10
13 |         * fixed build problems because of TemplateHaskell 2.11
14 | 
15 | 0.1.0.1: 2016-08-10
16 |         * fixed build issue: added bitfn.h to cabal file
17 | 
18 | 0.1.0.0: 2016-08-09
19 |         * initial release
20 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | Copyright (c) 2015 - 2016 factis research GmbH
 2 | 
 3 | All rights reserved.
 4 | 
 5 | Redistribution and use in source and binary forms, with or without
 6 | modification, are permitted provided that the following conditions are met:
 7 | 
 8 |     * Redistributions of source code must retain the above copyright
 9 |       notice, this list of conditions and the following disclaimer.
10 | 
11 |     * Redistributions in binary form must reproduce the above
12 |       copyright notice, this list of conditions and the following
13 |       disclaimer in the documentation and/or other materials provided
14 |       with the distribution.
15 | 
16 |     * Neither the name of Author name here nor the names of other
17 |       contributors may be used to endorse or promote products derived
18 |       from this software without specific prior written permission.
19 | 
20 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
21 | "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
22 | LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
23 | A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
24 | OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
25 | SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
26 | LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
27 | DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
28 | THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
29 | (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
30 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
31 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # LargeHashable
 2 | 
 3 | [![BuildStatus](https://github.com/factisresearch/large-hashable/actions/workflows/haskell-ci.yml/badge.svg?branch=master)](https://github.com/factisresearch/large-hashable/actions/workflows/haskell-ci.yml/badge.svg?branch=master)
 4 | [![Hackage](https://img.shields.io/hackage/v/large-hashable.svg)](http://hackage.haskell.org/package/large-hashable)
 5 | 
 6 | Efficiently hash Haskell values with MD5, SHA256, SHA512 and other
 7 | hashing algorithms.
 8 | 
 9 | ## Install
10 | 
11 | * Using cabal: `cabal install large-hashable`
12 | * Using Stack: `stack install large-hashable`
13 | 
14 | ### Building from git repository
15 | 
16 | - clone the repository
17 | - Install the stack tool (http://haskellstack.org)
18 | - `stack build` builds the code
19 | - `stack test` builds the code and runs the tests
20 | - `run-benchmarks.sh` runs the benchmark suite
21 | 


--------------------------------------------------------------------------------
/Setup.hs:
--------------------------------------------------------------------------------
1 | import Distribution.Simple
2 | main = defaultMain
3 | 


--------------------------------------------------------------------------------
/TODO:
--------------------------------------------------------------------------------
1 | - Support for SHA256 and SHA512, maybe also for more hashing algorithms
2 | - Documentation
3 | 


--------------------------------------------------------------------------------
/benchmark/Data/LargeHashable/Benchmarks/CryptoHash.hs:
--------------------------------------------------------------------------------
 1 | {-# LANGUAGE ExistentialQuantification #-}
 2 | {-# LANGUAGE MagicHash #-}
 3 | module Data.LargeHashable.Benchmarks.CryptoHash where
 4 | 
 5 | -- keep imports in alphabetic order (in Emacs, use "M-x sort-lines")
 6 | import Data.Bits
 7 | import Data.Byteable
 8 | import Data.List (foldl')
 9 | import Data.Word
10 | import qualified Crypto.Hash as H
11 | import qualified Data.ByteString as BS
12 | import qualified Data.ByteString.Base16 as Base16
13 | import qualified Data.ByteString.Builder as B
14 | import qualified Data.ByteString.Char8 as BSC
15 | import qualified Data.ByteString.Lazy as BSL
16 | import qualified Data.Text as T
17 | import qualified Data.Text.Encoding as T
18 | 
19 | data HashAlgorithm
20 |     = MD5
21 |     | SHA256
22 |     | SHA512
23 |       deriving (Eq, Show)
24 | 
25 | data HashCtx = forall h . H.HashAlgorithm h => HashCtx !(H.Context h)
26 | 
27 | newtype Hash = Hash { unHash :: BS.ByteString }
28 |     deriving (Eq)
29 | 
30 | instance Show Hash where
31 |     show (Hash bs) =
32 |         BSC.unpack (Base16.encode bs)
33 | 
34 | hashMd5 :: LargeHashable h => h -> Hash
35 | hashMd5 h =
36 |     let ctx = hashInit MD5
37 |     in hashFinish (hashUpdate ctx h)
38 | 
39 | hashInit :: HashAlgorithm -> HashCtx
40 | hashInit alg =
41 |     case alg of
42 |       MD5 -> HashCtx (H.hashInit :: H.Context H.MD5)
43 |       SHA256 -> HashCtx (H.hashInit :: H.Context H.SHA256)
44 |       SHA512 -> HashCtx (H.hashInit :: H.Context H.SHA512)
45 | 
46 | hashFinish :: HashCtx -> Hash
47 | hashFinish (HashCtx x) = Hash (toBytes $ H.hashFinalize x)
48 | 
49 | updateFromBuilder :: HashCtx -> B.Builder -> HashCtx
50 | updateFromBuilder (HashCtx ctx) builder =
51 |     HashCtx (H.hashUpdates ctx (BSL.toChunks (B.toLazyByteString builder)))
52 | 
53 | class LargeHashable a where
54 |     hashUpdate :: HashCtx -> a -> HashCtx
55 | 
56 | instance LargeHashable BS.ByteString where
57 |     hashUpdate (HashCtx x) bs = HashCtx (H.hashUpdate x bs)
58 | 
59 | instance LargeHashable Int where
60 |     hashUpdate (HashCtx ctx) i =
61 |         -- we can make this faster by accessing the machine represenation of an int
62 |         let w = (fromIntegral (toInteger i)) :: Word64
63 |         in HashCtx (H.hashUpdate ctx (BS.pack [(fromIntegral (shiftR w 56) :: Word8)
64 |                                               ,(fromIntegral (shiftR w 48) :: Word8)
65 |                                               ,(fromIntegral (shiftR w 40) :: Word8)
66 |                                               ,(fromIntegral (shiftR w 32) :: Word8)
67 |                                               ,(fromIntegral (shiftR w 24) :: Word8)
68 |                                               ,(fromIntegral (shiftR w 16) :: Word8)
69 |                                               ,(fromIntegral (shiftR w  8) :: Word8)
70 |                                               ,(fromIntegral (w)           :: Word8)]))
71 | 
72 | instance LargeHashable T.Text where
73 |     hashUpdate (HashCtx ctx) t = HashCtx (H.hashUpdate ctx (T.encodeUtf8 t))
74 | 
75 | instance LargeHashable a => LargeHashable [a] where
76 |     hashUpdate ctx l =
77 |         foldl' hashUpdate ctx l
78 | 


--------------------------------------------------------------------------------
/benchmark/Data/LargeHashable/Benchmarks/Main.hs:
--------------------------------------------------------------------------------
 1 | {-# LANGUAGE BangPatterns #-}
 2 | {-# LANGUAGE DeriveAnyClass #-}
 3 | {-# LANGUAGE DeriveGeneric #-}
 4 | {-# LANGUAGE OverloadedStrings #-}
 5 | {-# LANGUAGE TemplateHaskell #-}
 6 | module Data.LargeHashable.Benchmarks.Main (main) where
 7 | 
 8 | import Control.DeepSeq
 9 | import Criterion
10 | import Criterion.Main
11 | import Data.SafeCopy
12 | import Data.Serialize.Put
13 | import GHC.Generics
14 | import qualified Data.Bytes.Serial as S
15 | import qualified Data.LargeHashable as LH
16 | import qualified Data.LargeHashable.Class as LH
17 | import qualified Data.LargeHashable.Intern as LH
18 | import qualified Data.LargeHashable.Benchmarks.CryptoHash as CH
19 | import qualified Data.LargeHashable.Benchmarks.Serial as Serial
20 | import qualified Data.LargeHashable.Benchmarks.Text as TextBench
21 | import qualified Data.Text as T
22 | 
23 | data Patient
24 |     = Patient
25 |     { p_firstName :: !T.Text
26 |     , p_lastName :: !T.Text
27 |     , p_note :: !T.Text
28 |     , p_age :: !Int
29 |     }
30 |     deriving (Eq, Show, NFData, Generic)
31 | 
32 | $(deriveSafeCopy 0 'base ''Patient)
33 | $(LH.deriveLargeHashable ''Patient)
34 | 
35 | instance S.Serial Patient
36 | 
37 | instance CH.LargeHashable Patient where
38 |     hashUpdate ctx0 p =
39 |         let !ctx1 = CH.hashUpdate ctx0 (p_firstName p)
40 |             !ctx2 = CH.hashUpdate ctx1 (p_lastName p)
41 |             !ctx3 = CH.hashUpdate ctx2 (p_note p)
42 |             !ctx4 = CH.hashUpdate ctx3 (p_age p)
43 |         in ctx4
44 | 
45 | updateHashPatient :: Patient -> LH.LH ()
46 | updateHashPatient p =
47 |     {-# SCC "updateHash/LargHashable" #-}
48 |     do {-# SCC "updateHash/firstName" #-} LH.updateHash (p_firstName p)
49 |        {-# SCC "updateHash/lastName" #-} LH.updateHash (p_lastName p)
50 |        {-# SCC "updateHash/note" #-} LH.updateHash (p_note p)
51 |        {-# SCC "updateHash/age" #-} LH.updateHash (p_age p)
52 | 
53 | mkPatList :: Int -> [Patient]
54 | mkPatList n =
55 |     let l = map mkPat [1..n]
56 |     in l `deepseq` l
57 |     where
58 |       mkPat i =
59 |           Patient
60 |           { p_firstName = "Stefan"
61 |           , p_lastName = "Wehr"
62 |           , p_note = "Dies ist ein bißchen mehr Text, aber auch nicht richtig lang"
63 |           , p_age = i
64 |           }
65 | 
66 | _NUM_ :: Int
67 | _NUM_ = 100000
68 | 
69 | patList :: [Patient]
70 | patList = mkPatList _NUM_
71 | 
72 | main :: IO ()
73 | main =
74 |     defaultMain
75 |     [ env (return patList) $ \l ->
76 |         bgroup "patList"
77 |         [ bench "safecopy" $ whnf (CH.hashMd5 . runPut . safePut) l
78 |         , bench "cryptohash" $ whnf CH.hashMd5 l
79 |         , bench "large-hashable (Manual)" $ whnf (LH.runLH LH.md5HashAlgorithm . LH.updateHashList updateHashPatient) l
80 |         , bench "large-hashable (TH)" $ whnf (LH.largeHash LH.md5HashAlgorithm) l
81 |         , bench "large-hashable (Generic)" $ whnf (LH.runLH LH.md5HashAlgorithm . LH.genericUpdateHash) l
82 |         , bench "large-hashable-serial (TH)" $ whnf (Serial.serialLargeHash LH.md5HashAlgorithm) l
83 |         ]
84 |     , env TextBench.setup $ \ ~(text, string) ->
85 |         bgroup "text"
86 |         [ bench "text-utf8" $ whnf TextBench.benchTextUtf8 text
87 |         , bench "text-utf16" $ whnf TextBench.benchTextUtf16 text
88 |         , bench "text-unicode" $ whnf TextBench.benchTextUnicode text
89 |         , bench "string" $ whnf TextBench.benchTextString string
90 |         ]
91 |     ]
92 | 


--------------------------------------------------------------------------------
/benchmark/Data/LargeHashable/Benchmarks/Serial.hs:
--------------------------------------------------------------------------------
 1 | {-# OPTIONS_GHC -fno-warn-orphans #-}
 2 | module Data.LargeHashable.Benchmarks.Serial (serialLargeHash) where
 3 | 
 4 | import Data.LargeHashable.Intern
 5 | import Data.LargeHashable.Class
 6 | import Data.Bytes.Put
 7 | import Data.Bytes.Serial
 8 | 
 9 | serialLargeHash :: Serial a => HashAlgorithm h -> a -> h
10 | serialLargeHash algo a = runLH algo $ serialize a
11 | 
12 | instance MonadPut LH where
13 |     flush = return ()
14 | 
15 |     putWord8 = updateHash
16 |     putWord16host = updateHash
17 |     putWord32host = updateHash
18 |     putWord64host = updateHash
19 |     putWordhost = updateHash
20 | 
21 |     putWord16le = updateHash
22 |     putWord32le = updateHash
23 |     putWord64le = updateHash
24 | 
25 |     putWord16be = updateHash
26 |     putWord32be = updateHash
27 |     putWord64be = updateHash
28 | 
29 |     putByteString = updateHash
30 |     putLazyByteString = updateHash
31 | 


--------------------------------------------------------------------------------
/benchmark/Data/LargeHashable/Benchmarks/Text.hs:
--------------------------------------------------------------------------------
 1 | {-# LANGUAGE BangPatterns #-}
 2 | {-# LANGUAGE CPP #-}
 3 | {-# LANGUAGE ScopedTypeVariables #-}
 4 | module Data.LargeHashable.Benchmarks.Text (
 5 |     setup,
 6 |     benchTextUtf8,
 7 |     benchTextUtf16,
 8 |     benchTextUnicode,
 9 |     benchTextString
10 | ) where
11 | 
12 | import Data.Char (ord)
13 | import qualified Data.Text as T
14 | import qualified Data.Text.Encoding as TE
15 | import qualified Data.Text.Foreign as TF
16 | import qualified Data.Text.IO as T
17 | import qualified Data.ByteString as BS
18 | import Data.LargeHashable
19 | import Data.LargeHashable.Intern
20 | import Data.Word
21 | import GHC.Ptr
22 | 
23 | setup :: IO (T.Text, String)
24 | setup = do
25 | #if MIN_VERSION_text(2,0,0)
26 |   text <- T.readFile "test/bigfile.txt"
27 |   let string = T.unpack text
28 |   return (length string `seq` (text, string))
29 | #else
30 |   fail "benchmark for text only available with text >= 2"
31 | #endif
32 | 
33 | -- Hash the native UTF8 encoding
34 | benchTextUtf8 :: T.Text -> MD5Hash
35 | benchTextUtf8 !t = runLH md5HashAlgorithm go
36 |   where
37 |     go = do
38 |       updates <- hashUpdates
39 |       ioInLH $ do
40 |         TF.useAsPtr t $ \(valPtr :: Ptr Word8) (units :: TF.I8) ->
41 |             hu_updatePtr updates (castPtr valPtr) (fromIntegral units)
42 |         hu_updateULong updates (fromIntegral (T.length t))
43 | 
44 | -- Hash the UTF16 encoding for backwards compat with text-1.*
45 | benchTextUtf16 :: T.Text -> MD5Hash
46 | benchTextUtf16 !t =
47 |   let b = TE.encodeUtf16BE t -- encoding of text-1.* (at least on mac with big endian)
48 |   in runLH md5HashAlgorithm (go b)
49 |   where
50 |     go b = do
51 |       updates <- hashUpdates
52 |       ioInLH $ do
53 |         ptr <- BS.useAsCString b return
54 |         hu_updatePtr updates (castPtr ptr) (BS.length b)
55 |         hu_updateULong updates (fromIntegral (T.length t))
56 | 
57 | benchTextUnicode :: T.Text -> MD5Hash
58 | benchTextUnicode !t = runLH md5HashAlgorithm go
59 |   where
60 |     go = do
61 |       updates <- hashUpdates
62 |       ioInLH $ do
63 |         hashStringNoLength updates (T.unpack t)
64 |         hu_updateULong updates (fromIntegral (T.length t))
65 | 
66 | hashStringNoLength :: HashUpdates -> String -> IO ()
67 | hashStringNoLength updates = loop
68 |   where
69 |     loop :: [Char] -> IO ()
70 |     loop [] = pure ()
71 |     loop (c:cs) = do
72 |        _ <- hu_updateUInt updates (c2w c)
73 |        loop cs
74 | 
75 | benchTextString:: String -> MD5Hash
76 | benchTextString s = runLH md5HashAlgorithm $ do
77 |   updates <- hashUpdates
78 |   ioInLH $ hashString updates s
79 | 
80 | hashString :: HashUpdates -> String -> IO ()
81 | hashString updates = loop 0
82 |   where
83 |     loop :: Int -> [Char] -> IO ()
84 |     loop !n [] = hu_updateULong updates (fromIntegral n)
85 |     loop !n (c:cs) = do
86 |        _ <- hu_updateUInt updates (c2w c)
87 |        loop (n+1) cs
88 | 
89 | {-# INLINE c2w #-}
90 | c2w :: Char -> Word32
91 | c2w c = toEnum (ord c :: Int)
92 | 


--------------------------------------------------------------------------------
/benchmark/Main.hs:
--------------------------------------------------------------------------------
1 | module Main where
2 | 
3 | import qualified Data.LargeHashable.Benchmarks.Main as M
4 | 
5 | main :: IO ()
6 | main = M.main
7 | 


--------------------------------------------------------------------------------
/cbits/md5.h:
--------------------------------------------------------------------------------
  1 | /*
  2 |  * Copyright (C) 2006-2009 Vincent Hanquez <vincent@snarc.org>
  3 |  *               2016      Herbert Valerio Riedel <hvr@gnu.org>
  4 |  *
  5 |  * Redistribution and use in source and binary forms, with or without
  6 |  * modification, are permitted provided that the following conditions
  7 |  * are met:
  8 |  * 1. Redistributions of source code must retain the above copyright
  9 |  *    notice, this list of conditions and the following disclaimer.
 10 |  * 2. Redistributions in binary form must reproduce the above copyright
 11 |  *    notice, this list of conditions and the following disclaimer in the
 12 |  *    documentation and/or other materials provided with the distribution.
 13 |  *
 14 |  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
 15 |  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
 16 |  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
 17 |  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
 18 |  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
 19 |  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
 20 |  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
 21 |  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
 22 |  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
 23 |  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 24 |  */
 25 | 
 26 | #ifndef MD5_H
 27 | #define MD5_H
 28 | 
 29 | #include <stdint.h>
 30 | #include <stddef.h>
 31 | #include <assert.h>
 32 | #include <string.h>
 33 | #include <ghcautoconf.h>
 34 | 
 35 | struct md5_ctx
 36 | {
 37 | 	uint64_t sz;
 38 | 	uint8_t  buf[64];
 39 | 	uint32_t h[4];
 40 | };
 41 | 
 42 | #define MD5_DIGEST_SIZE		16
 43 | #define MD5_CTX_SIZE		88
 44 | 
 45 | static inline void md5_init(struct md5_ctx *ctx);
 46 | static inline void md5_update(struct md5_ctx *ctx, const uint8_t *data, size_t len);
 47 | static inline void md5_update_uchar(struct md5_ctx *ctx, uint8_t data);
 48 | static inline void md5_update_ushort(struct md5_ctx *ctx, uint16_t data);
 49 | static inline void md5_update_uint(struct md5_ctx *ctx, uint32_t data);
 50 | static inline void md5_update_ulong(struct md5_ctx *ctx, uint64_t data);
 51 | static inline uint64_t md5_finalize(struct md5_ctx *ctx, uint8_t *out);
 52 | 
 53 | #if defined(static_assert)
 54 | static_assert(sizeof(struct md5_ctx) == MD5_CTX_SIZE, "unexpected md5_ctx size");
 55 | #else
 56 | /* poor man's pre-C11 _Static_assert */
 57 | typedef char static_assertion__unexpected_md5_ctx_size[(sizeof(struct md5_ctx) == MD5_CTX_SIZE)?1:-1];
 58 | #endif
 59 | 
 60 | #define ptr_uint32_aligned(ptr) (!((uintptr_t)(ptr) & 0x3))
 61 | 
 62 | static inline uint32_t
 63 | rol32(const uint32_t word, const unsigned shift)
 64 | {
 65 |   /* GCC usually transforms this into a 'rol'-insn */
 66 |   return (word << shift) | (word >> (32 - shift));
 67 | }
 68 | 
 69 | static inline uint32_t
 70 | cpu_to_le32(const uint32_t hl)
 71 | {
 72 | #if !WORDS_BIGENDIAN
 73 |   return hl;
 74 | #elif __GNUC__ > 4 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 3)
 75 |   return __builtin_bswap32(hl);
 76 | #else
 77 |   /* GCC usually transforms this into a bswap insn */
 78 |   return ((hl & 0xff000000) >> 24) |
 79 |          ((hl & 0x00ff0000) >> 8)  |
 80 |          ((hl & 0x0000ff00) << 8)  |
 81 |          ( hl               << 24);
 82 | #endif
 83 | }
 84 | 
 85 | static inline void
 86 | cpu_to_le32_array(uint32_t *dest, const uint32_t *src, unsigned wordcnt)
 87 | {
 88 |   while (wordcnt--)
 89 |     *dest++ = cpu_to_le32(*src++);
 90 | }
 91 | 
 92 | static inline uint64_t
 93 | cpu_to_le64(const uint64_t hll)
 94 | {
 95 | #if !WORDS_BIGENDIAN
 96 |   return hll;
 97 | #elif __GNUC__ > 4 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 3)
 98 |   return __builtin_bswap64(hll);
 99 | #else
100 |   return ((uint64_t)cpu_to_le32(hll & 0xffffffff) << 32LL) | cpu_to_le32(hll >> 32);
101 | #endif
102 | }
103 | 
104 | 
105 | static inline void
106 | md5_init(struct md5_ctx *ctx)
107 | {
108 |   memset(ctx, 0, sizeof(*ctx));
109 | 
110 |   ctx->h[0] = 0x67452301;
111 |   ctx->h[1] = 0xefcdab89;
112 |   ctx->h[2] = 0x98badcfe;
113 |   ctx->h[3] = 0x10325476;
114 | }
115 | 
116 | #define f1(x, y, z)     (z ^ (x & (y ^ z)))
117 | #define f2(x, y, z)     f1(z, x, y)
118 | #define f3(x, y, z)     (x ^ y ^ z)
119 | #define f4(x, y, z)     (y ^ (x | ~z))
120 | #define R(f, a, b, c, d, i, k, s) a += f(b, c, d) + w[i] + k; a = rol32(a, s); a += b
121 | 
122 | static void
123 | md5_do_chunk_aligned(struct md5_ctx *ctx, const uint32_t w[])
124 | {
125 |   uint32_t a = ctx->h[0];
126 |   uint32_t b = ctx->h[1];
127 |   uint32_t c = ctx->h[2];
128 |   uint32_t d = ctx->h[3];
129 | 
130 |   R(f1, a, b, c, d,  0, 0xd76aa478,  7);
131 |   R(f1, d, a, b, c,  1, 0xe8c7b756, 12);
132 |   R(f1, c, d, a, b,  2, 0x242070db, 17);
133 |   R(f1, b, c, d, a,  3, 0xc1bdceee, 22);
134 |   R(f1, a, b, c, d,  4, 0xf57c0faf,  7);
135 |   R(f1, d, a, b, c,  5, 0x4787c62a, 12);
136 |   R(f1, c, d, a, b,  6, 0xa8304613, 17);
137 |   R(f1, b, c, d, a,  7, 0xfd469501, 22);
138 |   R(f1, a, b, c, d,  8, 0x698098d8,  7);
139 |   R(f1, d, a, b, c,  9, 0x8b44f7af, 12);
140 |   R(f1, c, d, a, b, 10, 0xffff5bb1, 17);
141 |   R(f1, b, c, d, a, 11, 0x895cd7be, 22);
142 |   R(f1, a, b, c, d, 12, 0x6b901122,  7);
143 |   R(f1, d, a, b, c, 13, 0xfd987193, 12);
144 |   R(f1, c, d, a, b, 14, 0xa679438e, 17);
145 |   R(f1, b, c, d, a, 15, 0x49b40821, 22);
146 | 
147 |   R(f2, a, b, c, d,  1, 0xf61e2562,  5);
148 |   R(f2, d, a, b, c,  6, 0xc040b340,  9);
149 |   R(f2, c, d, a, b, 11, 0x265e5a51, 14);
150 |   R(f2, b, c, d, a,  0, 0xe9b6c7aa, 20);
151 |   R(f2, a, b, c, d,  5, 0xd62f105d,  5);
152 |   R(f2, d, a, b, c, 10, 0x02441453,  9);
153 |   R(f2, c, d, a, b, 15, 0xd8a1e681, 14);
154 |   R(f2, b, c, d, a,  4, 0xe7d3fbc8, 20);
155 |   R(f2, a, b, c, d,  9, 0x21e1cde6,  5);
156 |   R(f2, d, a, b, c, 14, 0xc33707d6,  9);
157 |   R(f2, c, d, a, b,  3, 0xf4d50d87, 14);
158 |   R(f2, b, c, d, a,  8, 0x455a14ed, 20);
159 |   R(f2, a, b, c, d, 13, 0xa9e3e905,  5);
160 |   R(f2, d, a, b, c,  2, 0xfcefa3f8,  9);
161 |   R(f2, c, d, a, b,  7, 0x676f02d9, 14);
162 |   R(f2, b, c, d, a, 12, 0x8d2a4c8a, 20);
163 | 
164 |   R(f3, a, b, c, d,  5, 0xfffa3942,  4);
165 |   R(f3, d, a, b, c,  8, 0x8771f681, 11);
166 |   R(f3, c, d, a, b, 11, 0x6d9d6122, 16);
167 |   R(f3, b, c, d, a, 14, 0xfde5380c, 23);
168 |   R(f3, a, b, c, d,  1, 0xa4beea44,  4);
169 |   R(f3, d, a, b, c,  4, 0x4bdecfa9, 11);
170 |   R(f3, c, d, a, b,  7, 0xf6bb4b60, 16);
171 |   R(f3, b, c, d, a, 10, 0xbebfbc70, 23);
172 |   R(f3, a, b, c, d, 13, 0x289b7ec6,  4);
173 |   R(f3, d, a, b, c,  0, 0xeaa127fa, 11);
174 |   R(f3, c, d, a, b,  3, 0xd4ef3085, 16);
175 |   R(f3, b, c, d, a,  6, 0x04881d05, 23);
176 |   R(f3, a, b, c, d,  9, 0xd9d4d039,  4);
177 |   R(f3, d, a, b, c, 12, 0xe6db99e5, 11);
178 |   R(f3, c, d, a, b, 15, 0x1fa27cf8, 16);
179 |   R(f3, b, c, d, a,  2, 0xc4ac5665, 23);
180 | 
181 |   R(f4, a, b, c, d,  0, 0xf4292244,  6);
182 |   R(f4, d, a, b, c,  7, 0x432aff97, 10);
183 |   R(f4, c, d, a, b, 14, 0xab9423a7, 15);
184 |   R(f4, b, c, d, a,  5, 0xfc93a039, 21);
185 |   R(f4, a, b, c, d, 12, 0x655b59c3,  6);
186 |   R(f4, d, a, b, c,  3, 0x8f0ccc92, 10);
187 |   R(f4, c, d, a, b, 10, 0xffeff47d, 15);
188 |   R(f4, b, c, d, a,  1, 0x85845dd1, 21);
189 |   R(f4, a, b, c, d,  8, 0x6fa87e4f,  6);
190 |   R(f4, d, a, b, c, 15, 0xfe2ce6e0, 10);
191 |   R(f4, c, d, a, b,  6, 0xa3014314, 15);
192 |   R(f4, b, c, d, a, 13, 0x4e0811a1, 21);
193 |   R(f4, a, b, c, d,  4, 0xf7537e82,  6);
194 |   R(f4, d, a, b, c, 11, 0xbd3af235, 10);
195 |   R(f4, c, d, a, b,  2, 0x2ad7d2bb, 15);
196 |   R(f4, b, c, d, a,  9, 0xeb86d391, 21);
197 | 
198 |   ctx->h[0] += a;
199 |   ctx->h[1] += b;
200 |   ctx->h[2] += c;
201 |   ctx->h[3] += d;
202 | }
203 | 
204 | static void
205 | md5_do_chunk(struct md5_ctx *ctx, const uint8_t buf[])
206 | {
207 |   if (ptr_uint32_aligned(buf)) { /* aligned buf */
208 | #if WORDS_BIGENDIAN
209 |     uint32_t w[16]; cpu_to_le32_array(w, (const uint32_t *)buf, 16);
210 | #else
211 |     const uint32_t *w = (const uint32_t *)buf;
212 | #endif
213 |     md5_do_chunk_aligned(ctx, w);
214 |   } else { /* unaligned buf */
215 |     uint32_t w[16]; memcpy(w, buf, 64);
216 | #if WORDS_BIGENDIAN
217 |     cpu_to_le32_array(w, w, 16);
218 | #endif
219 |     md5_do_chunk_aligned(ctx, w);
220 |   }
221 | }
222 | 
223 | static inline void
224 | md5_update(struct md5_ctx *ctx, const uint8_t *data, size_t len)
225 | {
226 |   size_t index = ctx->sz & 0x3f;
227 |   const size_t to_fill = 64 - index;
228 | 
229 |   ctx->sz += len;
230 | 
231 |   /* process partial buffer if there's enough data to make a block */
232 |   if (index && len >= to_fill) {
233 |     memcpy(ctx->buf + index, data, to_fill);
234 |     md5_do_chunk(ctx, ctx->buf);
235 |     /* memset(ctx->buf, 0, 64); */
236 |     len -= to_fill;
237 |     data += to_fill;
238 |     index = 0;
239 |   }
240 | 
241 |   /* process as many 64-blocks as possible */
242 |   while (len >= 64) {
243 |     md5_do_chunk(ctx, data);
244 |     len -= 64;
245 |     data += 64;
246 |   }
247 | 
248 |   /* append data into buf */
249 |   if (len)
250 |     memcpy(ctx->buf + index, data, len);
251 | }
252 | 
253 | static inline void md5_update_uchar(struct md5_ctx *ctx, uint8_t data)
254 | {
255 |     md5_update(ctx, &data, sizeof(data));
256 | }
257 | 
258 | static inline void md5_update_ushort(struct md5_ctx *ctx, uint16_t data)
259 | {
260 |     md5_update(ctx, (uint8_t *)&data, sizeof(data));
261 | }
262 | 
263 | static inline void md5_update_uint(struct md5_ctx *ctx, uint32_t data)
264 | {
265 |     md5_update(ctx, (uint8_t *)&data, sizeof(data));
266 | }
267 | 
268 | static inline void md5_update_ulong(struct md5_ctx *ctx, uint64_t data)
269 | {
270 |     md5_update(ctx, (uint8_t *)&data, sizeof(data));
271 | }
272 | 
273 | static inline uint64_t
274 | md5_finalize(struct md5_ctx *ctx, uint8_t *out)
275 | {
276 |   static const uint8_t padding[64] = { 0x80, };
277 |   const uint64_t sz = ctx->sz;
278 | 
279 |   /* add padding and update data with it */
280 |   const uint64_t bits = cpu_to_le64(ctx->sz << 3);
281 | 
282 |   /* pad out to 56 */
283 |   const size_t index = (ctx->sz & 0x3f);
284 |   const size_t padlen = (index < 56) ? (56 - index) : ((64 + 56) - index);
285 |   md5_update(ctx, padding, padlen);
286 | 
287 |   /* append length */
288 |   md5_update(ctx, (const uint8_t *) &bits, sizeof(bits));
289 | 
290 |   /* output hash */
291 |   cpu_to_le32_array((uint32_t *) out, ctx->h, 4);
292 | 
293 |   return sz;
294 | }
295 | 
296 | #endif
297 | 


--------------------------------------------------------------------------------
/default.nix:
--------------------------------------------------------------------------------
1 | let pkgs = import <nixpkgs> { };
2 | in pkgs.haskellPackages.callPackage ./large-hashable.nix { }
3 | 


--------------------------------------------------------------------------------
/hie.yaml:
--------------------------------------------------------------------------------
 1 | cradle:
 2 |   stack:
 3 |     - path: "./src"
 4 |       component: "large-hashable:lib"
 5 | 
 6 |     - path: "./benchmark"
 7 |       component: "large-hashable:bench:large-hashable-benchmark"
 8 | 
 9 |     - path: "./test"
10 |       component: "large-hashable:test:large-hashable-test"
11 | 


--------------------------------------------------------------------------------
/large-hashable.cabal:
--------------------------------------------------------------------------------
  1 | name:                large-hashable
  2 | version:             0.1.2.0
  3 | synopsis:            Efficiently hash (large) Haskell values
  4 | description:         Please see README.md
  5 | homepage:            https://github.com/factisresearch/large-hashable
  6 | license:             BSD3
  7 | license-file:        LICENSE
  8 | author:              Stefan Wehr, Lukas Epple
  9 | maintainer:          Stefan Wehr
 10 | copyright:           2015 - 2017 factis research GmbH, 2018 - 2023 medilyse GmbH, 2024 - 2025 MEQO GmbH
 11 | category:            Web
 12 | build-type:          Simple
 13 | cabal-version:       2.0
 14 | extra-source-files:
 15 |     cbits/md5.h
 16 |     README.md
 17 |     ChangeLog
 18 |     default.nix
 19 |     stack.yaml
 20 |     test/bigfile.txt
 21 |     stack-ghc-9.2.yaml
 22 |     stack-ghc-9.4.yaml
 23 |     stack-ghc-9.6.yaml
 24 |     stack-ghc-9.8.yaml
 25 | 
 26 | library
 27 |   hs-source-dirs:      src
 28 |   include-dirs: cbits
 29 |   exposed-modules:     Data.LargeHashable
 30 |                      , Data.LargeHashable.Class
 31 |                      , Data.LargeHashable.MD5
 32 |                      , Data.LargeHashable.Intern
 33 |                      , Data.LargeHashable.LargeWord
 34 |                      , Data.LargeHashable.TH
 35 |   other-modules:       Data.LargeHashable.Endianness
 36 |   build-depends:       aeson
 37 |                      , base >= 4.8 && < 5
 38 |                      , base16-bytestring
 39 |                      , bytes
 40 |                      , bytestring
 41 |                      , containers
 42 |                      , crypton
 43 |                      , memory
 44 |                      , scientific
 45 |                      , strict
 46 |                      , template-haskell
 47 |                      , text
 48 |                      , time
 49 |                      , transformers
 50 |                      , unordered-containers
 51 |                      , vector
 52 |                      , void
 53 |   build-tool-depends:  cpphs:cpphs
 54 |   default-language:    Haskell2010
 55 |   ghc-options:         -optc -O3 -W -fwarn-unused-imports -fwarn-unused-binds
 56 |                        -fwarn-unused-matches -fwarn-unused-do-bind -fwarn-wrong-do-bind
 57 |                        -fno-warn-name-shadowing
 58 |                        -fwarn-missing-signatures -O2
 59 | 
 60 | benchmark large-hashable-benchmark
 61 |   type:                exitcode-stdio-1.0
 62 |   hs-source-dirs:      benchmark
 63 |   main-is:             Main.hs
 64 |   other-modules:       Data.LargeHashable.Benchmarks.CryptoHash
 65 |                      , Data.LargeHashable.Benchmarks.Main
 66 |                      , Data.LargeHashable.Benchmarks.Serial
 67 |                      , Data.LargeHashable.Benchmarks.Text
 68 |   ghc-options:         -optc -O3 -threaded -rtsopts -with-rtsopts=-N
 69 |                        -W -fwarn-unused-imports -fwarn-unused-binds
 70 |                        -fwarn-unused-matches -fwarn-unused-do-bind -fwarn-wrong-do-bind
 71 |                        -pgmP "cpphs --cpp" -rtsopts -threaded -funbox-strict-fields
 72 |                        -fwarn-missing-signatures -fno-warn-name-shadowing -O2
 73 |   build-depends:       base >= 4.8 && < 5
 74 |                      , base16-bytestring
 75 |                      , criterion
 76 |                      , large-hashable
 77 |                      , safecopy
 78 |                      , text
 79 |                      , deepseq
 80 |                      , cryptohash
 81 |                      , bytestring
 82 |                      , cereal
 83 |                      , byteable
 84 |                      , transformers
 85 |                      , bytes
 86 |   build-tool-depends:  cpphs:cpphs
 87 |   default-language:    Haskell2010
 88 | 
 89 | test-suite large-hashable-test
 90 |   type:                exitcode-stdio-1.0
 91 |   hs-source-dirs:      test
 92 |   main-is:             Main.hs
 93 |   build-depends:       aeson
 94 |                      , HTF
 95 |                      , QuickCheck
 96 |                      , base >= 4.8 && < 5
 97 |                      , bytes
 98 |                      , bytestring
 99 |                      , containers
100 |                      , hashable
101 |                      , large-hashable
102 |                      , scientific
103 |                      , strict
104 |                      , text
105 |                      , time
106 |                      , unordered-containers
107 |                      , vector
108 |                      , inspection-testing
109 |   ghc-options:         -optc -O3 -threaded -rtsopts -with-rtsopts=-N
110 |                        -W -fwarn-unused-imports -fwarn-unused-binds
111 |                        -fwarn-unused-matches -fwarn-unused-do-bind -fwarn-wrong-do-bind
112 |                        -pgmP "cpphs --cpp" -rtsopts -threaded -funbox-strict-fields
113 |                        -fwarn-missing-signatures -fno-warn-name-shadowing
114 |   build-tool-depends:  cpphs:cpphs
115 |   default-language:    Haskell2010
116 |   other-modules:       Data.LargeHashable.Tests.Class
117 |                      , Data.LargeHashable.Tests.Helper
118 |                      , Data.LargeHashable.Tests.Inspection
119 |                      , Data.LargeHashable.Tests.TH
120 |                      , Data.LargeHashable.Tests.LargeWord
121 |                      , Data.LargeHashable.Tests.MD5
122 |                      , Data.LargeHashable.Tests.Stable
123 | 
124 | source-repository head
125 |   type:     git
126 |   location: https://github.com/factisresearch/large-hashable
127 | 


--------------------------------------------------------------------------------
/large-hashable.nix:
--------------------------------------------------------------------------------
 1 | { mkDerivation, aeson, base, base16-bytestring, bytes, bytestring
 2 | , containers, hashable, HTF, QuickCheck, scientific, stdenv, strict
 3 | , template-haskell, text, time, transformers, unordered-containers
 4 | , utf8-light, vector, void
 5 | }:
 6 | mkDerivation {
 7 |   pname = "large-hashable";
 8 |   version = "0.1.0.3";
 9 |   src = ./.;
10 |   libraryHaskellDepends = [
11 |     aeson base base16-bytestring bytes bytestring containers scientific
12 |     strict template-haskell text time transformers unordered-containers
13 |     utf8-light vector void
14 |   ];
15 |   testHaskellDepends = [
16 |     aeson base bytes bytestring containers hashable HTF QuickCheck
17 |     scientific strict text time unordered-containers vector
18 |   ];
19 |   homepage = "https://github.com/factisresearch/large-hashable";
20 |   description = "Efficiently hash (large) Haskell values";
21 |   license = stdenv.lib.licenses.bsd3;
22 | }
23 | 


--------------------------------------------------------------------------------
/run-benchmarks.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | cd "$(dirname "$0")"
 4 | 
 5 | if type stack >/dev/null 2>&1
 6 | then
 7 |     echo "Compiling ..."
 8 |     stack build || exit 1
 9 |     echo "Done compiling"
10 |     exe=$(stack exec -- which large-hashable-benchmark)
11 | else
12 |     exe=./dist/build/large-hashable-benchmark/large-hashable-benchmark
13 |     echo "Not compiling, using executable $exe"
14 | fi
15 | 
16 | function run()
17 | {
18 |     echo
19 |     echo "Running benchmark $1"
20 |     $exe "$1" +RTS -s 2>&1 | egrep 'total memory in use|Total   time|Productivity|bytes allocated in the heap'
21 |     if ! test ${PIPESTATUS[0]} -eq 0; then
22 |         echo "Benchmark $1 failed!"
23 |         exit 1
24 |     fi
25 | }
26 | 
27 | run dry
28 | run safecopy
29 | run cryptohash
30 | run large-hashable-serial
31 | run large-hashable
32 | 


--------------------------------------------------------------------------------
/run-with-profiling:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | if [ -z "$1" -o "$1" == "--help" ]; then
 4 |     echo "USAGE: $0 [--heap] BENCHMARK_NAME"
 5 |     exit 1
 6 | fi
 7 | 
 8 | prof=time
 9 | if [ "$1" == "--heap" ]; then
10 |     prof=heap
11 |     shift
12 | fi
13 | 
14 | stack build --executable-profiling --library-profiling --ghc-options=-fprof-auto
15 | 
16 | exe=$(stack exec -- which large-hashable-benchmark)
17 | 
18 | if [ "$prof" == "time" ]; then
19 |     $exe $1 +RTS -p
20 | else
21 |     $exe $1 +RTS -hd
22 | fi
23 | 


--------------------------------------------------------------------------------
/scripts/ci-check:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | if [ -z "$LH_TEST_STACK_YAML" ]; then
 4 |     echo > /dev/stderr "Environment variable LH_TEST_STACK_YAML not set"
 5 |     exit 1
 6 | fi
 7 | 
 8 | STACK_WORK=$HOME/.LH-stack-work
 9 | export LH_TEST_STACK_ARGS="--stack-yaml $LH_TEST_STACK_YAML"
10 | export LANG=C.UTF-8
11 | export LC_ALL=C.UTF-8
12 | echo "Environment variables:"
13 | env
14 | 
15 | STACK_OPTS="$LH_TEST_STACK_ARGS --allow-different-user"
16 | 
17 | echo -n "Working directory: "
18 | pwd
19 | 
20 | echo -n "Cached ~/.stack directory: "
21 | ls -l ~/.stack 2> /dev/stdout
22 | 
23 | echo -n "Cached $STACK_WORK directory: "
24 | ls -l $STACK_WORK 2> /dev/stdout
25 | 
26 | echo -n "Local .stack-work directory: "
27 | ls -l .stack-work 2> /dev/stdout
28 | 
29 | if [ -d $STACK_WORK -a ! -d .stack-work ]; then
30 |     echo "Restoring .stack-work from $STACK_WORK ..."
31 |     cp -r $STACK_WORK .stack-work
32 |     echo "Done restoring .stack-work from $STACK_WORK"
33 | fi
34 | 
35 | echo "Running stack test --only-dependencies ..."
36 | time stack $STACK_OPTS test --only-dependencies || exit 1
37 | echo "Done running stack test --only-dependencies"
38 | 
39 | echo "Saving .stack-work to $STACK_WORK ..."
40 | rm -rf $STACK_WORK
41 | cp -r .stack-work $STACK_WORK
42 | echo "Done saving .stack-work to $STACK_WORK"
43 | 
44 | echo "Running stack test ..."
45 | stack $STACK_OPTS test --haddock || exit 1
46 | echo "Done running stack test"
47 | 


--------------------------------------------------------------------------------
/src/Data/LargeHashable.hs:
--------------------------------------------------------------------------------
 1 | -- | This is the top-level module of LargeHashable, a library
 2 | --   for efficiently hashing any Haskell data type using a
 3 | --   hash algorithm like MD5, SHA256 etc.
 4 | --
 5 | --   Normal users should import this module.
 6 | module Data.LargeHashable (
 7 |    LargeHashable(..)
 8 |  , LargeHashable'(..)
 9 |  , LH
10 |  , HashAlgorithm
11 |  , largeHash
12 |  , largeHashStable
13 |  , deriveLargeHashable
14 |  , deriveLargeHashableNoCtx
15 |  , deriveLargeHashableCtx
16 |  , deriveLargeHashableCustomCtx
17 |  , MD5Hash(..)
18 |  , md5HashAlgorithm
19 |  , runMD5
20 |  , module Data.LargeHashable.LargeWord
21 | ) where
22 | 
23 | import Data.LargeHashable.Class
24 | import Data.LargeHashable.Intern
25 | import Data.LargeHashable.LargeWord
26 | import Data.LargeHashable.MD5
27 | import Data.LargeHashable.TH
28 | 


--------------------------------------------------------------------------------
/src/Data/LargeHashable/Class.hs:
--------------------------------------------------------------------------------
  1 | -- | This module defines the central type class `LargeHashable` of this package.
  2 | {-# LANGUAGE BangPatterns #-}
  3 | {-# LANGUAGE CPP #-}
  4 | {-# LANGUAGE DataKinds #-}
  5 | {-# LANGUAGE DefaultSignatures #-}
  6 | {-# LANGUAGE FlexibleContexts #-}
  7 | {-# LANGUAGE FlexibleInstances #-}
  8 | {-# LANGUAGE KindSignatures #-}
  9 | {-# LANGUAGE MultiParamTypeClasses #-}
 10 | {-# LANGUAGE RankNTypes #-}
 11 | {-# LANGUAGE ScopedTypeVariables #-}
 12 | {-# LANGUAGE TypeOperators #-}
 13 | module Data.LargeHashable.Class (
 14 | 
 15 |     LargeHashable(..), largeHash, largeHashStable, LargeHashable'(..), genericUpdateHash,
 16 |     updateHashList
 17 | ) where
 18 | 
 19 | -- keep imports in alphabetic order (in Emacs, use "M-x sort-lines")
 20 | import Data.Bits
 21 | import Data.Char (ord)
 22 | import Data.Fixed
 23 | import Data.Foldable
 24 | import Data.Int
 25 | import Data.LargeHashable.Endianness
 26 | import Data.LargeHashable.Intern
 27 | import Data.Ratio
 28 | import Data.Time
 29 | import Data.Time.Clock.TAI
 30 | import Data.Void (Void)
 31 | import Data.Word
 32 | import Foreign.C.Types
 33 | import Foreign.Ptr
 34 | import GHC.Generics
 35 | import qualified Data.Aeson as J
 36 | #if MIN_VERSION_aeson(2,0,0)
 37 | import qualified Data.Aeson.Key as AesonKey
 38 | import qualified Data.Aeson.KeyMap as AesonKeyMap
 39 | #endif
 40 | import qualified Data.ByteString as B
 41 | import qualified Data.ByteString.Lazy as BL
 42 | import qualified Data.ByteString.Lazy.Internal as BLI
 43 | import qualified Data.ByteString.Short as BS
 44 | import qualified Data.Foldable as F
 45 | import qualified Data.HashMap.Lazy as HashMap
 46 | import qualified Data.HashSet as HashSet
 47 | import qualified Data.IntMap as IntMap
 48 | import qualified Data.IntSet as IntSet
 49 | import qualified Data.Map as M
 50 | import qualified Data.Scientific as Sci
 51 | import qualified Data.Sequence as Seq
 52 | import qualified Data.Set as S
 53 | import qualified Data.Strict.Tuple as Tuple
 54 | import qualified Data.Text as T
 55 | import qualified Data.Text.Encoding as TE
 56 | import qualified Data.Text.Foreign as TF
 57 | import qualified Data.Text.Internal.Lazy as TLI
 58 | import qualified Data.Text.Lazy as TL
 59 | import qualified Data.Vector as V
 60 | import Data.Kind (Type)
 61 | 
 62 | -- | A type class for computing hashes (i.e. MD5, SHA256, ...) from
 63 | -- haskell values.
 64 | --
 65 | -- The laws of this typeclass are the following:
 66 | --
 67 | -- (1) If two values are equal
 68 | -- according to '==', then the finally computed hashes must also be equal
 69 | -- according to '=='. However it is not required that the hashes of inequal
 70 | -- values have to be inequal. Also note that an instance of 'LargeHashable'
 71 | -- does not require a instance of 'Eq'. Using any sane algorithm the chance
 72 | -- of a collision should be 1 / n where n is the number of different hashes
 73 | -- possible.
 74 | --
 75 | -- (2) If two values are inequal
 76 | -- according to '==', then the probability of a hash collision is 1/n,
 77 | -- where n is the number of possible hashes produced by the
 78 | -- underlying hash algorithm.
 79 | --
 80 | -- A rule of thumb: hash all information that you would also need for
 81 | -- serializing/deserializing values of your datatype. For instance, when
 82 | -- hashing lists, you would not only hash the list elements but also the
 83 | -- length of the list. Consider the following datatype
 84 | --
 85 | -- > data Foo = Foo [Int] [Int]
 86 | --
 87 | -- We now write an instance for LargeHashable like this
 88 | --
 89 | -- > instance LargeHashable Foo where
 90 | -- >     updateHash (Foo l1 l2) = updateHash l1 >> updateHash l2
 91 | --
 92 | -- If we did not hash the length of a list, then the following two values
 93 | -- of @Foo@ would produce identical hashes:
 94 | --
 95 | -- > Foo [1,2,3] []
 96 | -- > Foo [1] [2,3]
 97 | --
 98 | class LargeHashable a where
 99 |     updateHash :: a -> LH ()
100 |     default updateHash :: (GenericLargeHashable (Rep a), Generic a) => a -> LH ()
101 |     updateHash = genericUpdateHash
102 |     updateHashStable :: a -> LH ()
103 |     default updateHashStable :: (GenericLargeHashable (Rep a), Generic a) => a -> LH ()
104 |     updateHashStable = genericUpdateHashStable
105 | 
106 | class LargeHashable' t where
107 |     updateHash' :: LargeHashable a => t a -> LH ()
108 |     updateHashStable' :: LargeHashable a => t a -> LH ()
109 | 
110 | -- | 'largeHash' is the central function of this package.
111 | --   For a given value it computes a 'Hash' using the given
112 | --   'HashAlgorithm'. The library tries to keep the
113 | --   hash values for @LargeHashable@ instances provided by
114 | --   library stable across releases, but there is no guarantee.
115 | --   See @largeHashStable&
116 | largeHash :: LargeHashable a => HashAlgorithm h -> a -> h
117 | largeHash algo x = runLH algo (updateHash x)
118 | 
119 | -- | 'largeHashStable' is similar to @largeHash@, but the hash
120 | --   value is guaranteed to remain stable across releases,
121 | --   even if this causes performance to degrade.
122 | largeHashStable :: LargeHashable a => HashAlgorithm h -> a -> h
123 | largeHashStable algo x = runLH algo (updateHashStable x)
124 | 
125 | {-# INLINE updateHashTextData #-}
126 | updateHashTextData :: T.Text -> LH ()
127 | updateHashTextData !t = do
128 |     updates <- hashUpdates
129 |     ioInLH $ do
130 | #if MIN_VERSION_text(2,0,0)
131 |         TF.useAsPtr t $ \(valPtr :: Ptr Word8) (units :: TF.I8) ->
132 |             hu_updatePtr updates (castPtr valPtr) (fromIntegral units)
133 | #else
134 |         -- UTF-16 encoding
135 |         TF.useAsPtr t $ \(valPtr :: Ptr Word16) (units :: TF.I16) ->
136 |             hu_updatePtr updates (castPtr valPtr) (fromIntegral (2 * units))
137 | #endif
138 |         return ()
139 | 
140 | {-# INLINE updateHashText #-}
141 | updateHashText :: T.Text -> LH ()
142 | updateHashText !t = do
143 |     updateHashTextData t
144 |     updates <- hashUpdates
145 |     ioInLH $ hu_updateULong updates (fromIntegral (T.length t))
146 | 
147 | {-# INLINE updateHashStableTextData #-}
148 | updateHashStableTextData :: T.Text -> LH ()
149 | updateHashStableTextData t = do
150 |     let bs =
151 |             case systemEndianness of
152 |                 LittleEndian -> TE.encodeUtf16LE t
153 |                 BigEndian -> TE.encodeUtf16BE t
154 |     updateHashByteStringData bs
155 | 
156 | {-# INLINE updateHashStableText #-}
157 | updateHashStableText :: T.Text -> LH ()
158 | updateHashStableText t = do
159 |     updateHashStableTextData t
160 |     updates <- hashUpdates
161 |     ioInLH $ hu_updateULong updates (fromIntegral (T.length t))
162 | 
163 | instance LargeHashable T.Text where
164 |     updateHash = updateHashText
165 |     updateHashStable = updateHashStableText
166 | 
167 | {-# INLINE updateHashLazyText #-}
168 | updateHashLazyText :: Int -> TL.Text -> LH ()
169 | updateHashLazyText !len (TLI.Chunk !t !next) = do
170 |     updateHashTextData t
171 |     updateHashLazyText (len + T.length t) next
172 | updateHashLazyText !len TLI.Empty = updateHash len
173 | 
174 | {-# INLINE updateHashStableLazyText #-}
175 | updateHashStableLazyText :: Int -> TL.Text -> LH ()
176 | updateHashStableLazyText !len (TLI.Chunk !t !next) = do
177 |     updateHashStableTextData t
178 |     updateHashStableLazyText (len + T.length t) next
179 | updateHashStableLazyText !len TLI.Empty = updateHash len
180 | 
181 | instance LargeHashable TL.Text where
182 |     updateHash = updateHashLazyText 0
183 |     updateHashStable = updateHashStableLazyText 0
184 | 
185 | {-# INLINE updateHashByteStringData #-}
186 | updateHashByteStringData :: B.ByteString -> LH ()
187 | updateHashByteStringData !b = do
188 |     updates <- hashUpdates
189 |     ioInLH $ do
190 |         ptr <- B.useAsCString b return
191 |         hu_updatePtr updates (castPtr ptr) (B.length b)
192 | 
193 | {-# INLINE updateHashByteString #-}
194 | updateHashByteString :: B.ByteString -> LH ()
195 | updateHashByteString !b = do
196 |     updateHashByteStringData b
197 |     updates <- hashUpdates
198 |     ioInLH $ hu_updateULong updates (fromIntegral (B.length b))
199 | 
200 | instance LargeHashable B.ByteString where
201 |     updateHash = updateHashByteString
202 |     updateHashStable = updateHash
203 | 
204 | {-# INLINE updateHashLazyByteString #-}
205 | updateHashLazyByteString :: Int -> BL.ByteString -> LH ()
206 | updateHashLazyByteString !len (BLI.Chunk !bs !next) = do
207 |     updateHashByteStringData bs
208 |     updateHashLazyByteString (len + B.length bs) next
209 | updateHashLazyByteString !len BLI.Empty = updateHash len
210 | 
211 | instance LargeHashable BL.ByteString where
212 |     updateHash = updateHashLazyByteString 0
213 |     updateHashStable = updateHash
214 | 
215 | instance LargeHashable BS.ShortByteString where
216 |     updateHash = updateHash . BS.fromShort
217 |     updateHashStable = updateHash
218 | 
219 | {-# INLINE updateHashWithFun #-}
220 | updateHashWithFun :: (HashUpdates -> a -> IO ()) -> a -> LH ()
221 | updateHashWithFun f x =
222 |     do updates <- hashUpdates
223 |        ioInLH $ f updates x
224 | 
225 | instance LargeHashable Int where
226 |     updateHash = updateHashWithFun hu_updateULong . fromIntegral
227 |     updateHashStable = updateHash
228 | 
229 | instance LargeHashable Int8 where
230 |     updateHash = updateHashWithFun hu_updateUChar . fromIntegral
231 |     updateHashStable = updateHash
232 | 
233 | instance LargeHashable Int16 where
234 |     updateHash = updateHashWithFun hu_updateUShort . fromIntegral
235 |     updateHashStable = updateHash
236 | 
237 | instance LargeHashable Int32 where
238 |     updateHash = updateHashWithFun hu_updateUInt . fromIntegral
239 |     updateHashStable = updateHash
240 | 
241 | instance LargeHashable Int64 where
242 |     updateHash = updateHashWithFun hu_updateULong . fromIntegral
243 |     updateHashStable = updateHash
244 | 
245 | instance LargeHashable Word where
246 |     updateHash = updateHashWithFun hu_updateULong . fromIntegral
247 |     updateHashStable = updateHash
248 | 
249 | instance LargeHashable Word8 where
250 |     updateHash = updateHashWithFun hu_updateUChar
251 |     updateHashStable = updateHash
252 | 
253 | instance LargeHashable Word16 where
254 |     updateHash = updateHashWithFun hu_updateUShort
255 |     updateHashStable = updateHash
256 | 
257 | instance LargeHashable Word32 where
258 |     updateHash = updateHashWithFun hu_updateUInt
259 |     updateHashStable = updateHash
260 | 
261 | instance LargeHashable Word64 where
262 |     updateHash = updateHashWithFun hu_updateULong . fromIntegral
263 |     updateHashStable = updateHash
264 | 
265 | instance LargeHashable CChar where
266 |     updateHash (CChar i) = updateHashWithFun hu_updateUChar (fromIntegral i)
267 |     updateHashStable = updateHash
268 | 
269 | instance LargeHashable CShort where
270 |     updateHash (CShort i) = updateHashWithFun hu_updateUShort (fromIntegral i)
271 |     updateHashStable = updateHash
272 | 
273 | instance LargeHashable CInt where
274 |     updateHash (CInt i) = updateHashWithFun hu_updateUInt (fromIntegral i)
275 |     updateHashStable = updateHash
276 | 
277 | instance LargeHashable CLong where
278 |     updateHash (CLong i) = updateHashWithFun hu_updateULong (fromIntegral i)
279 |     updateHashStable = updateHash
280 | 
281 | instance LargeHashable CUChar where
282 |     updateHash (CUChar w) = updateHashWithFun hu_updateUChar w
283 |     updateHashStable = updateHash
284 | 
285 | instance LargeHashable CUShort where
286 |     updateHash (CUShort w) = updateHashWithFun hu_updateUShort w
287 |     updateHashStable = updateHash
288 | 
289 | instance LargeHashable CUInt where
290 |     updateHash (CUInt w) = updateHashWithFun hu_updateUInt w
291 |     updateHashStable = updateHash
292 | 
293 | instance LargeHashable CULong where
294 |     updateHash (CULong w) = updateHashWithFun hu_updateULong (fromIntegral w)
295 |     updateHashStable = updateHash
296 | 
297 | instance LargeHashable Char where
298 |     updateHash = updateHashWithFun hu_updateUInt . c2w
299 |     updateHashStable = updateHash
300 | 
301 | c2w :: Char -> Word32
302 | {-# INLINE c2w #-}
303 | c2w c = toEnum (ord c :: Int)
304 | 
305 | {-# INLINE updateHashInteger #-}
306 | updateHashInteger :: Integer -> LH ()
307 | updateHashInteger !i
308 |     | i == 0 = updateHash (0 :: CUChar)
309 |     | i > 0  = do
310 |         updateHash (fromIntegral (i .&. 0xffffffffffffffff) :: CULong)
311 |         updateHashInteger (shift i (-64))
312 |     | otherwise = do
313 |         updateHash (0 :: CUChar) -- prepend 0 to show it is negative
314 |         updateHashInteger (abs i)
315 | 
316 | instance LargeHashable Integer where
317 |     updateHash = updateHashInteger
318 |     updateHashStable = updateHash
319 | 
320 | foreign import ccall doubleToWord64 :: Double -> Word64
321 | 
322 | instance LargeHashable Double where
323 |     updateHash = updateHash . doubleToWord64
324 |     updateHashStable = updateHash
325 | 
326 | foreign import ccall floatToWord32 :: Float -> Word32
327 | 
328 | instance LargeHashable Float where
329 |     updateHash = updateHash . floatToWord32
330 |     updateHashStable = updateHash
331 | 
332 | {-# INLINE updateHashFixed #-}
333 | updateHashFixed :: HasResolution a => Fixed a -> LH ()
334 | updateHashFixed f = updateHash (truncate . (* f) . fromInteger $ resolution f :: Integer)
335 | 
336 | instance HasResolution a => LargeHashable (Fixed a) where
337 |     updateHash = updateHashFixed
338 |     updateHashStable = updateHash
339 | 
340 | {-# INLINE updateHashBool #-}
341 | updateHashBool :: Bool -> LH ()
342 | updateHashBool True  = updateHash (1 :: CUChar)
343 | updateHashBool False = updateHash (0 :: CUChar)
344 | 
345 | instance LargeHashable Bool where
346 |     updateHash = updateHashBool
347 |     updateHashStable = updateHash
348 | 
349 | {-# INLINE updateHashList #-}
350 | updateHashList :: forall a. (a -> LH ()) -> [a] -> LH ()
351 | updateHashList f = loop 0
352 |     where
353 |       loop :: Int -> [a] -> LH ()
354 |       loop !i [] =
355 |           updateHash i
356 |       loop !i (x:xs) = do
357 |           f x
358 |           loop (i + 1) xs
359 | 
360 | instance LargeHashable a => LargeHashable [a] where
361 |     updateHash = updateHashList updateHash
362 |     updateHashStable = updateHashList updateHashStable
363 | 
364 | {-# INLINE setFoldFun #-}
365 | setFoldFun :: LargeHashable a => (a -> LH ()) -> LH () -> a -> LH ()
366 | setFoldFun f action value = action >> f value
367 | 
368 | {-# INLINE updateHashSet #-}
369 | updateHashSet :: LargeHashable a => (a -> LH ()) -> S.Set a -> LH ()
370 | updateHashSet !f !set = do
371 |     foldl' (setFoldFun f) (return ()) set -- Note: foldl' for sets traverses the elements in asc order
372 |     updateHash (S.size set)
373 | 
374 | instance LargeHashable a => LargeHashable (S.Set a) where
375 |     updateHash = updateHashSet updateHash
376 |     updateHashStable = updateHashSet updateHashStable
377 | 
378 | {-# INLINE updateHashIntSet #-}
379 | updateHashIntSet :: IntSet.IntSet -> LH ()
380 | updateHashIntSet !set = do
381 |     IntSet.foldl' (setFoldFun updateHash) (return ()) set
382 |     updateHash (IntSet.size set)
383 | 
384 | -- Lazy and Strict IntSet share the same definition
385 | instance LargeHashable IntSet.IntSet where
386 |     updateHash = updateHashIntSet
387 |     updateHashStable = updateHash
388 | 
389 | {-# INLINE updateHashHashSet #-}
390 | updateHashHashSet :: LargeHashable a => (a -> LH ()) -> HashSet.HashSet a -> LH ()
391 | updateHashHashSet !f !set =
392 |     -- The ordering of elements in a set does not matter. A HashSet does not
393 |     -- offer an efficient way of exctracting its elements in some specific
394 |     -- ordering. So we use the auxiliary function 'hashListModuloOrdering'.
395 |     hashListModuloOrdering f (HashSet.size set) (HashSet.toList set)
396 | 
397 | -- | Hashes a list of values such the two permutations of the same list
398 | -- yields the same hash.
399 | hashListModuloOrdering :: LargeHashable a => (a -> LH ()) -> Int -> [a] -> LH ()
400 | hashListModuloOrdering f len list =
401 |     do updateXorHash (map f list)
402 |        updateHash len
403 | 
404 | -- Lazy and Strict HashSet share the same definition
405 | instance LargeHashable a => LargeHashable (HashSet.HashSet a) where
406 |     updateHash = updateHashHashSet updateHash
407 |     updateHashStable = updateHashHashSet updateHashStable
408 | 
409 | {-# INLINE mapFoldFun #-}
410 | mapFoldFun :: (LargeHashable k, LargeHashable a) =>
411 |     (k -> LH ()) -> (a -> LH ()) -> LH () -> k -> a -> LH ()
412 | mapFoldFun kf vf action key value = action >> kf key >> vf value
413 | 
414 | {-# INLINE updateHashMap #-}
415 | updateHashMap :: (LargeHashable k, LargeHashable a) =>
416 |     (k -> LH ()) -> (a -> LH ()) -> M.Map k a -> LH ()
417 | updateHashMap !kf !vf !m = do
418 |         M.foldlWithKey' (mapFoldFun kf vf) (return ()) m
419 |         updateHash (M.size m)
420 | 
421 | -- Lazy and Strict Map share the same definition
422 | instance (LargeHashable k, LargeHashable a) => LargeHashable (M.Map k a) where
423 |     updateHash = updateHashMap updateHash updateHash
424 |     updateHashStable = updateHashMap updateHashStable updateHashStable
425 | 
426 | {-# INLINE updateHashIntMap #-}
427 | updateHashIntMap :: LargeHashable a => (a -> LH ()) -> IntMap.IntMap a -> LH ()
428 | updateHashIntMap !f !m = do
429 |     IntMap.foldlWithKey' (mapFoldFun updateHash f) (return ()) m
430 |     updateHash (IntMap.size m)
431 | 
432 | -- Lazy and Strict IntMap share the same definition
433 | instance LargeHashable a => LargeHashable (IntMap.IntMap a) where
434 |     updateHash = updateHashIntMap updateHash
435 |     updateHashStable = updateHashIntMap updateHashStable
436 | 
437 | updateHashHashMap :: (LargeHashable k, LargeHashable v) =>
438 |     ((k, v) -> LH ()) -> HashMap.HashMap k v -> LH ()
439 | updateHashHashMap !f !m =
440 |     -- The ordering of elements in a map do not matter. A HashMap does not
441 |     -- offer an efficient way of exctracting its elements in some specific
442 |     -- ordering. So we use the auxiliary function 'hashListModuloOrdering'.
443 |     hashListModuloOrdering f (HashMap.size m) (HashMap.toList m)
444 | 
445 | -- Lazy and Strict HashMap share the same definition
446 | instance (LargeHashable k, LargeHashable v) => LargeHashable (HashMap.HashMap k v) where
447 |     updateHash = updateHashHashMap updateHash
448 |     updateHashStable = updateHashHashMap updateHashStable
449 | 
450 | instance (LargeHashable a, LargeHashable b) => LargeHashable (a, b) where
451 |     updateHash (!a, !b) = updateHash a >> updateHash b
452 |     updateHashStable (!a, !b) = updateHashStable a >> updateHashStable b
453 | 
454 | instance (LargeHashable a, LargeHashable b, LargeHashable c) => LargeHashable (a, b, c) where
455 |     updateHash (a, b, c) = updateHash a >> updateHash b >> updateHash c
456 |     updateHashStable (a, b, c) = updateHashStable a >> updateHashStable b >> updateHashStable c
457 | 
458 | instance (LargeHashable a, LargeHashable b, LargeHashable c, LargeHashable d) => LargeHashable (a, b, c, d) where
459 |     updateHash (a, b, c, d) = updateHash a >> updateHash b >> updateHash c >> updateHash d
460 |     updateHashStable (a, b, c, d) =
461 |         updateHashStable a >> updateHashStable b >> updateHashStable c >> updateHashStable d
462 | 
463 | instance (LargeHashable a, LargeHashable b, LargeHashable c, LargeHashable d, LargeHashable e) => LargeHashable (a, b, c, d, e) where
464 |     updateHash (a, b, c, d, e) =
465 |         updateHash a >> updateHash b >> updateHash c >> updateHash d >> updateHash e
466 |     updateHashStable (a, b, c, d, e) =
467 |         updateHashStable a >> updateHashStable b >> updateHashStable c >> updateHashStable d >> updateHashStable e
468 | 
469 | instance LargeHashable a => LargeHashable (Maybe a) where
470 |     updateHash Nothing   = updateHash (0 :: CULong)
471 |     updateHash (Just !x) = updateHash (1 :: CULong) >> updateHash x
472 |     updateHashStable Nothing   = updateHash (0 :: CULong)
473 |     updateHashStable (Just !x) = updateHash (1 :: CULong) >> updateHashStable x
474 | 
475 | instance (LargeHashable a, LargeHashable b) => LargeHashable (Either a b) where
476 |     updateHash (Left !l)  = updateHash (0 :: CULong) >> updateHash l
477 |     updateHash (Right !r) = updateHash (1 :: CULong) >> updateHash r
478 |     updateHashStable (Left !l)  = updateHash (0 :: CULong) >> updateHashStable l
479 |     updateHashStable (Right !r) = updateHash (1 :: CULong) >> updateHashStable r
480 | 
481 | instance LargeHashable () where
482 |     updateHash () = return ()
483 |     updateHashStable () = return ()
484 | 
485 | instance LargeHashable Ordering where
486 |     updateHash EQ = updateHash (0  :: CULong)
487 |     updateHash GT = updateHash (-1 :: CULong)
488 |     updateHash LT = updateHash (1  :: CULong)
489 |     updateHashStable = updateHash
490 | 
491 | instance (Integral a, LargeHashable a) => LargeHashable (Ratio a) where
492 |     updateHash !i = do
493 |         updateHash $ numerator i
494 |         updateHash $ denominator i
495 |     updateHashStable = updateHash
496 | 
497 | instance LargeHashable AbsoluteTime where
498 |     updateHash t = updateHash $ diffAbsoluteTime t taiEpoch
499 |     updateHashStable = updateHash
500 | 
501 | instance LargeHashable DiffTime where
502 |     -- could be replaced by diffTimeToPicoseconds as soon as
503 |     -- time 1.6 becomes more common
504 |     updateHash = updateHash . (fromRational . toRational :: DiffTime -> Pico)
505 |     updateHashStable = updateHash
506 | 
507 | instance LargeHashable NominalDiffTime where
508 |     updateHash = updateHash . (fromRational . toRational :: NominalDiffTime -> Pico)
509 |     updateHashStable = updateHash
510 | 
511 | instance LargeHashable LocalTime where
512 |     updateHash (LocalTime d tod) = updateHash d >> updateHash tod
513 |     updateHashStable = updateHash
514 | 
515 | instance LargeHashable ZonedTime where
516 |     updateHash (ZonedTime lt tz) = updateHash lt >> updateHash tz
517 |     updateHashStable = updateHash
518 | 
519 | instance LargeHashable TimeOfDay where
520 |     updateHash (TimeOfDay h m s) = updateHash h >> updateHash m >> updateHash s
521 |     updateHashStable = updateHash
522 | 
523 | instance LargeHashable TimeZone where
524 |     updateHash (TimeZone mintz summerOnly name) =
525 |         updateHash mintz >> updateHash summerOnly >> updateHash name
526 |     updateHashStable (TimeZone mintz summerOnly name) =
527 |         updateHashStable mintz >> updateHashStable summerOnly >> updateHashStable name
528 | 
529 | instance LargeHashable UTCTime where
530 |     updateHash (UTCTime d dt) = updateHash d >> updateHash dt
531 |     updateHashStable = updateHash
532 | 
533 | instance LargeHashable Day where
534 |     updateHash (ModifiedJulianDay d) = updateHash d
535 |     updateHashStable = updateHash
536 | 
537 | instance LargeHashable UniversalTime where
538 |     updateHash (ModJulianDate d) = updateHash d
539 |     updateHashStable = updateHash
540 | 
541 | instance LargeHashable a => LargeHashable (V.Vector a) where
542 |     updateHash = updateHash . V.toList
543 |     updateHashStable = updateHashStable . V.toList
544 | 
545 | instance (LargeHashable a, LargeHashable b) => LargeHashable (Tuple.Pair a b) where
546 |     updateHash (x Tuple.:!: y) =
547 |         do updateHash x
548 |            updateHash y
549 |     updateHashStable (x Tuple.:!: y) =
550 |         do updateHashStable x
551 |            updateHashStable y
552 | 
553 | instance LargeHashable Sci.Scientific where
554 |     updateHash notNormalized =
555 |         do let n = Sci.normalize notNormalized
556 |            updateHash (Sci.coefficient n)
557 |            updateHash (Sci.base10Exponent n)
558 |     updateHashStable = updateHash
559 | 
560 | updateHashJson :: (forall a . LargeHashable a => a -> LH ()) -> J.Value -> LH ()
561 | updateHashJson f v =
562 |     case v of
563 |         J.Object obj ->
564 |             do updateHash (0::Int)
565 |                f obj
566 |         J.Array arr ->
567 |             do updateHash (1::Int)
568 |                f arr
569 |         J.String t ->
570 |             do updateHash (2::Int)
571 |                f t
572 |         J.Number n ->
573 |             do updateHash (3::Int)
574 |                f n
575 |         J.Bool b ->
576 |             do updateHash (4::Int)
577 |                f b
578 |         J.Null ->
579 |             updateHash (5::Int)
580 | 
581 | instance LargeHashable J.Value where
582 |     updateHash = updateHashJson updateHash
583 |     updateHashStable = updateHashJson updateHashStable
584 | 
585 | #if MIN_VERSION_aeson(2,0,0)
586 | instance LargeHashable J.Key where
587 |     updateHash = updateHash . AesonKey.toText
588 |     updateHashStable = updateHashStable . AesonKey.toText
589 | 
590 | instance LargeHashable v => LargeHashable (AesonKeyMap.KeyMap v) where
591 |     updateHash v = updateHash (AesonKeyMap.toHashMap v)
592 |     updateHashStable v = updateHashStable (AesonKeyMap.toHashMap v)
593 | #endif
594 | 
595 | instance LargeHashable Void where
596 |     updateHash _ = error "I'm void"
597 | 
598 | instance LargeHashable a => LargeHashable (Seq.Seq a) where
599 |     updateHash = updateHash . F.toList
600 |     updateHashStable = updateHashStable . F.toList
601 | 
602 | genericUpdateHash :: (Generic a, GenericLargeHashable (Rep a)) => a -> LH ()
603 | genericUpdateHash = updateHashGeneric . from
604 | {-# INLINE genericUpdateHash #-}
605 | 
606 | genericUpdateHashStable :: (Generic a, GenericLargeHashable (Rep a)) => a -> LH ()
607 | genericUpdateHashStable = updateHashStableGeneric . from
608 | {-# INLINE genericUpdateHashStable #-}
609 | 
610 | -- | Support for generically deriving 'LargeHashable' instances.
611 | -- Any instance of the type class 'GHC.Generics.Generic' can be made
612 | -- an instance of 'LargeHashable' by an empty instance declaration.
613 | class GenericLargeHashable f where
614 |     updateHashGeneric :: f p -> LH ()
615 |     updateHashStableGeneric :: f p -> LH ()
616 | 
617 | instance GenericLargeHashable V1 where
618 |     {-# INLINE updateHashGeneric #-}
619 |     updateHashGeneric = undefined
620 |     updateHashStableGeneric = undefined
621 | 
622 | instance GenericLargeHashable U1 where
623 |     {-# INLINE updateHashGeneric #-}
624 |     updateHashGeneric U1 = updateHash ()
625 |     updateHashStableGeneric U1 = updateHashStable ()
626 | 
627 | instance (GenericLargeHashable f, GenericLargeHashable g) => GenericLargeHashable (f :*: g) where
628 |     {-# INLINE updateHashGeneric #-}
629 |     updateHashGeneric (x :*: y) = updateHashGeneric x >> updateHashGeneric y
630 |     updateHashStableGeneric (x :*: y) = updateHashStableGeneric x >> updateHashStableGeneric y
631 | 
632 | instance (GenericLargeHashable f, GenericLargeHashableSum g) => GenericLargeHashable (f :+: g) where
633 |     {-# INLINE updateHashGeneric #-}
634 |     updateHashGeneric x = updateHashGenericSum x 0
635 |     updateHashStableGeneric x = updateHashStableGenericSum x 0
636 | 
637 | instance LargeHashable c => GenericLargeHashable (K1 i c) where
638 |     {-# INLINE updateHashGeneric #-}
639 |     updateHashGeneric x = updateHash (unK1 x)
640 |     updateHashStableGeneric x = updateHashStable (unK1 x)
641 | 
642 | -- ignore meta-info (for now)
643 | instance (GenericLargeHashable f) => GenericLargeHashable (M1 i t f) where
644 |     {-# INLINE updateHashGeneric #-}
645 |     updateHashGeneric x = updateHashGeneric (unM1 x)
646 |     updateHashStableGeneric x = updateHashStableGeneric (unM1 x)
647 | 
648 | class GenericLargeHashableSum (f :: Type -> Type) where
649 |     updateHashGenericSum :: f p -> Int -> LH ()
650 |     updateHashStableGenericSum :: f p -> Int -> LH ()
651 | 
652 | instance (GenericLargeHashable f, GenericLargeHashableSum g)
653 |     => GenericLargeHashableSum (f :+: g) where
654 |     {-# INLINE updateHashGenericSum #-}
655 |     updateHashGenericSum (L1 x) !p = do
656 |         updateHash p
657 |         updateHashGeneric x
658 |     updateHashGenericSum (R1 x) !p = updateHashGenericSum x (p+1)
659 |     updateHashStableGenericSum (L1 x) !p = do
660 |         updateHashStable p
661 |         updateHashStableGeneric x
662 |     updateHashStableGenericSum (R1 x) !p = updateHashStableGenericSum x (p+1)
663 | 
664 | instance (GenericLargeHashable f) => GenericLargeHashableSum (M1 i t f) where
665 |     {-# INLINE updateHashGenericSum #-}
666 |     updateHashGenericSum x !p = do
667 |         updateHash p
668 |         updateHashGeneric (unM1 x)
669 |     updateHashStableGenericSum x !p = do
670 |         updateHashStable p
671 |         updateHashStableGeneric (unM1 x)
672 | 


--------------------------------------------------------------------------------
/src/Data/LargeHashable/Endianness.hs:
--------------------------------------------------------------------------------
 1 | {-# LANGUAGE CPP #-}
 2 | module Data.LargeHashable.Endianness (
 3 | 
 4 |     Endianness(..), systemEndianness
 5 | 
 6 | ) where
 7 | 
 8 | -- taken from the cpu package (last release 2013)
 9 | 
10 | -- | represent the CPU endianness
11 | --
12 | -- Big endian system stores bytes with the MSB as the first byte.
13 | -- Little endian system stores bytes with the LSB as the first byte.
14 | --
15 | -- middle endian is purposely avoided.
16 | data Endianness = LittleEndian
17 |                 | BigEndian
18 |                 deriving (Show,Eq)
19 | 
20 | -- | return the system endianness
21 | systemEndianness :: Endianness
22 | #ifdef WORDS_BIGENDIAN
23 | systemEndianness = BigEndian
24 | #else
25 | systemEndianness = LittleEndian
26 | #endif
27 | 


--------------------------------------------------------------------------------
/src/Data/LargeHashable/Intern.hs:
--------------------------------------------------------------------------------
  1 | -- | Generic, low-level data types for hashing. This is an internal module.
  2 | --
  3 | -- You should only import this module if you write your own hash algorithm
  4 | -- or if you need access to low-level hashing functions when defining
  5 | -- instances of 'LargeHash'.
  6 | --
  7 | -- Regular users should not import this module. Import 'Data.LargeHashable'
  8 | -- instead.
  9 | {-# LANGUAGE BangPatterns #-}
 10 | {-# LANGUAGE DeriveGeneric #-}
 11 | {-# LANGUAGE GeneralizedNewtypeDeriving #-}
 12 | module Data.LargeHashable.Intern (
 13 | 
 14 |     HashUpdates(..), HashAlgorithm(..), LH
 15 |   , hashUpdates, ioInLH, runLH, updateXorHash
 16 | 
 17 | ) where
 18 | 
 19 | -- keep imports in alphabetic order (in Emacs, use "M-x sort-lines")
 20 | import Control.Monad
 21 | import Data.Word
 22 | import Foreign.Ptr
 23 | import System.IO.Unsafe (unsafePerformIO)
 24 | 
 25 | -- | Functions for updating an intermediate hash value. The functions live
 26 | -- in the 'IO' monad because they are typically implemented via FFI.
 27 | data HashUpdates
 28 |     = HashUpdates
 29 |     { hu_updatePtr :: {-# NOUNPACK #-} !(Ptr Word8 -> Int -> IO ()) -- ^ adds a byte array to the hash
 30 |     , hu_updateUChar :: {-# NOUNPACK #-} !(Word8 -> IO ())    -- Word8
 31 |     , hu_updateUShort :: {-# NOUNPACK #-} !(Word16 -> IO ())  -- Word16
 32 |     , hu_updateUInt :: {-# NOUNPACK #-} !(Word32 -> IO ())      -- Word32
 33 |     , hu_updateULong :: {-# NOUNPACK #-} !(Word64 -> IO ())    -- Word64
 34 |     }
 35 | 
 36 | -- | The interface for a hashing algorithm. The interface contains a simple run
 37 | -- function, which is used to update the hash with all values needed, and the
 38 | -- outputs the resulting hash.
 39 | data HashAlgorithm h
 40 |     = HashAlgorithm
 41 |     { ha_run :: {-# NOUNPACK #-} !((HashUpdates -> IO ()) -> IO h)
 42 |     , ha_xor :: {-# NOUNPACK #-} !(h -> h -> h)
 43 |     , ha_updateHash :: {-# NOUNPACK #-} !(HashUpdates -> h -> IO ())
 44 |     }
 45 | 
 46 | data LHEnv
 47 |     = LHEnv
 48 |     { lh_updates :: {-# NOUNPACK #-} !HashUpdates
 49 |     , lh_updateXorHash :: {-# NOUNPACK #-} !([LH ()] -> IO ())
 50 |     }
 51 | 
 52 | -- | The 'LH' monad (`LH` stands for "large hash") is used in the definition of
 53 | -- hashing functions for arbitrary data types.
 54 | newtype LH a = LH (LHEnv -> IO a)
 55 | 
 56 | {-# INLINE lhFmap #-}
 57 | lhFmap :: (a -> b) -> LH a -> LH b
 58 | lhFmap f (LH x) =
 59 |     LH $ \env ->
 60 |     do y <- x env
 61 |        return (f y)
 62 | 
 63 | {-# INLINE lhReturn #-}
 64 | lhReturn :: a -> LH a
 65 | lhReturn x = LH $ \_env -> return x
 66 | 
 67 | {-# INLINE lhApp #-}
 68 | lhApp :: LH (a -> b) -> LH a -> LH b
 69 | lhApp (LH f) (LH x) =
 70 |     LH $ \env -> f env <*> x env
 71 | 
 72 | {-# INLINE lhBind #-}
 73 | lhBind :: LH a -> (a -> LH b) -> LH b
 74 | lhBind (LH x) f =
 75 |     LH $ \env ->
 76 |     do y <- x env
 77 |        let (LH g) = f y
 78 |        g env
 79 | 
 80 | {-# INLINE lhBind' #-}
 81 | lhBind' :: LH a -> LH b -> LH b
 82 | lhBind' (LH x) (LH y) =
 83 |     LH $ \env ->
 84 |     do _ <- x env
 85 |        y env
 86 | 
 87 | instance Functor LH where
 88 |     fmap = lhFmap
 89 | 
 90 | instance Applicative LH where
 91 |     pure = lhReturn
 92 |     (<*>) = lhApp
 93 |     (*>) = lhBind'
 94 | 
 95 | instance Monad LH where
 96 |     (>>=) = lhBind
 97 | 
 98 | {-# INLINE hashUpdates #-}
 99 | hashUpdates :: LH HashUpdates
100 | hashUpdates =
101 |     LH $ \env -> return (lh_updates env)
102 | 
103 | {-# INLINE getUpdateXorHash #-}
104 | getUpdateXorHash :: LH ([LH ()] -> IO ())
105 | getUpdateXorHash =
106 |     LH $ \env -> return (lh_updateXorHash env)
107 | 
108 | -- | Perform an 'IO' action in the 'LH' monad. Use with care, do not perform
109 | -- arbitrary 'IO' operation with this function! Only use it for calling
110 | -- functions of the 'HashUpdates' datatype.
111 | {-# INLINE ioInLH #-}
112 | ioInLH :: IO a -> LH a
113 | ioInLH io =
114 |     LH $ \_env -> io
115 | 
116 | -- | Runs a 'LH' computation and returns the resulting hash.
117 | {-# NOINLINE runLH #-}
118 | runLH :: HashAlgorithm h -> LH () -> h
119 | runLH alg lh =
120 |     unsafePerformIO (runLH' alg lh)
121 | 
122 | runLH' :: HashAlgorithm h -> LH () -> IO h
123 | runLH' alg (LH lh) =
124 |     ha_run alg fun
125 |     where
126 |       fun updates =
127 |           lh (LHEnv updates (updateXor updates))
128 |       updateXor updates actions =
129 |           do mh <- foldM foldFun Nothing actions
130 |              case mh of
131 |                Just h -> ha_updateHash alg updates h
132 |                Nothing -> return ()
133 |       foldFun mh action =
134 |           do h2 <- runLH' alg action
135 |              case mh of
136 |                Nothing -> return (Just h2)
137 |                Just h1 ->
138 |                    let !h = ha_xor alg h1 h2
139 |                    in return (Just h)
140 | 
141 | updateXorHash :: [LH ()] -> LH ()
142 | updateXorHash actions =
143 |     do f <- getUpdateXorHash
144 |        ioInLH (f actions)
145 | 


--------------------------------------------------------------------------------
/src/Data/LargeHashable/LargeWord.hs:
--------------------------------------------------------------------------------
 1 | {-# LANGUAGE DeriveDataTypeable #-}
 2 | {-# LANGUAGE DeriveGeneric #-}
 3 | -- | Efficient representation for small bytearrays with 128 or 256 bits.
 4 | module Data.LargeHashable.LargeWord
 5 |     ( Word128(..), Word256(..)
 6 |     , bsToW128, w128ToBs
 7 |     , bsToW256, w256ToBs
 8 |     , xorW128, xorW256
 9 |     )
10 | where
11 | 
12 | -- keep imports in alphabetic order (in Emacs, use "M-x sort-lines")
13 | import Data.Bits
14 | import Data.Data (Data)
15 | import Data.Typeable
16 | import Data.Word
17 | import GHC.Generics (Generic)
18 | import qualified Data.ByteString as BS
19 | 
20 | data Word128
21 |     = Word128
22 |     { w128_first :: !Word64
23 |     , w128_second :: !Word64
24 |     }
25 |     deriving (Show, Read, Eq, Ord, Typeable, Generic, Data)
26 | 
27 | data Word256
28 |     = Word256
29 |     { w256_first :: !Word128
30 |     , w256_second :: !Word128
31 |     }
32 |     deriving (Show, Read, Eq, Ord, Typeable, Generic, Data)
33 | 
34 | -- | Converts a 'ByteString' into a 'Word256'. Only the first 32 bytes
35 | -- are taken into account, the rest is ignored.
36 | bsToW256 :: BS.ByteString -> Word256
37 | bsToW256 bs = Word256 first128 next128
38 |     where
39 |       first128 = bsToW128 bs
40 |       next128 = bsToW128 (BS.drop 16 bs)
41 | 
42 | w256ToBs :: Word256 -> BS.ByteString
43 | w256ToBs (Word256 first128 next128) =
44 |     w128ToBs first128 `BS.append` w128ToBs next128
45 | 
46 | -- | Converts a 'ByteString' into a 'Word128'. Only the first 16 bytes
47 | -- are taken into account, the rest is ignored.
48 | bsToW128 :: BS.ByteString -> Word128
49 | bsToW128 bs = Word128 first64 next64
50 |     where
51 |       first64 = bsToW64 bs
52 |       next64 = bsToW64 (BS.drop 8 bs)
53 | 
54 | w128ToBs :: Word128 -> BS.ByteString
55 | w128ToBs (Word128 first64 next64) =
56 |     w64ToBs first64 `BS.append` w64ToBs next64
57 | 
58 | w64ToBs :: Word64 -> BS.ByteString
59 | w64ToBs w64 =
60 |     BS.pack
61 |     [ fromIntegral (w64 `shiftR` 56 .&. 255)
62 |     , fromIntegral (w64 `shiftR` 48 .&. 255)
63 |     , fromIntegral (w64 `shiftR` 40 .&. 255)
64 |     , fromIntegral (w64 `shiftR` 32 .&. 255)
65 |     , fromIntegral (w64 `shiftR` 24 .&. 255)
66 |     , fromIntegral (w64 `shiftR` 16 .&. 255)
67 |     , fromIntegral (w64 `shiftR` 8 .&. 255)
68 |     , fromIntegral (w64 .&. 255)
69 |     ]
70 | 
71 | bsToW64 :: BS.ByteString -> Word64
72 | bsToW64 = BS.foldl' (\x w8 -> (x `shiftL` 8) + fromIntegral w8) 0 . BS.take 8
73 | 
74 | xorW128 :: Word128 -> Word128 -> Word128
75 | xorW128 (Word128 w11 w12) (Word128 w21 w22) = Word128 (w11 `xor` w21) (w12 `xor` w22)
76 | 
77 | xorW256 :: Word256 -> Word256 -> Word256
78 | xorW256 (Word256 w11 w12) (Word256 w21 w22) = Word256 (w11 `xorW128` w21) (w12 `xorW128` w22)
79 | 


--------------------------------------------------------------------------------
/src/Data/LargeHashable/MD5.hs:
--------------------------------------------------------------------------------
  1 | -- | An implementation of 'HashAlgorithm' for MD5 (https://www.ietf.org/rfc/rfc1321.txt).
  2 | {-# LANGUAGE BangPatterns #-}
  3 | {-# LANGUAGE ScopedTypeVariables #-}
  4 | {-# LANGUAGE CApiFFI #-}
  5 | module Data.LargeHashable.MD5 (
  6 | 
  7 |     MD5Hash(..), md5HashAlgorithm, runMD5
  8 | 
  9 | ) where
 10 | 
 11 | -- keep imports in alphabetic order (in Emacs, use "M-x sort-lines")
 12 | import Data.LargeHashable.Intern
 13 | import Data.LargeHashable.LargeWord
 14 | import Data.Word
 15 | import Foreign.Marshal.Alloc
 16 | import Foreign.Ptr
 17 | import Foreign.Storable
 18 | import qualified Data.ByteString.Base16 as Base16
 19 | import qualified Data.ByteString.Char8 as BSC
 20 | 
 21 | newtype MD5Hash = MD5Hash { unMD5Hash :: Word128 }
 22 |     deriving (Eq, Ord)
 23 | 
 24 | instance Show MD5Hash where
 25 |     show (MD5Hash w) =
 26 |         BSC.unpack (Base16.encode (w128ToBs w))
 27 | 
 28 | instance Read MD5Hash where
 29 |     -- readsPrec :: Read a => Int -> String -> [(a, String)]
 30 |     readsPrec _ s =
 31 |         let n = 32
 32 |             (prefix, suffix) = splitAt n s
 33 |         in
 34 |             if length prefix /= n
 35 |                 then []
 36 |                 else
 37 |                     case Base16.decode (BSC.pack prefix) of
 38 |                         Left _ -> []
 39 |                         Right bs -> [(MD5Hash (bsToW128 bs), suffix)]
 40 | 
 41 | foreign import capi unsafe "md5.h md5_init"
 42 |     c_md5_init :: Ptr RawCtx -> IO ()
 43 | 
 44 | foreign import capi unsafe "md5.h md5_update"
 45 |     c_md5_update :: Ptr RawCtx -> Ptr Word8 -> Int -> IO ()
 46 | 
 47 | foreign import capi unsafe "md5.h md5_update_uchar"
 48 |     c_md5_update_uchar :: Ptr RawCtx -> Word8 -> IO ()
 49 | 
 50 | foreign import capi unsafe "md5.h md5_update_ushort"
 51 |     c_md5_update_ushort :: Ptr RawCtx -> Word16 -> IO ()
 52 | 
 53 | foreign import capi unsafe "md5.h md5_update_uint"
 54 |     c_md5_update_uint :: Ptr RawCtx -> Word32 -> IO ()
 55 | 
 56 | foreign import capi unsafe "md5.h md5_update_ulong"
 57 |     c_md5_update_ulong :: Ptr RawCtx -> Word64 -> IO ()
 58 | 
 59 | foreign import capi unsafe "md5.h md5_finalize"
 60 |     c_md5_finalize :: Ptr RawCtx -> Ptr Word8 -> IO ()
 61 | 
 62 | {-# INLINE digestSize #-}
 63 | digestSize :: Int
 64 | digestSize = 16
 65 | 
 66 | {-# INLINE sizeCtx #-}
 67 | sizeCtx :: Int
 68 | sizeCtx = 96
 69 | 
 70 | data RawCtx -- phantom type argument
 71 | 
 72 | newtype Ctx = Ctx { _unCtx :: Ptr RawCtx }
 73 | 
 74 | withCtx :: (Ctx -> IO ()) -> IO MD5Hash
 75 | withCtx f =
 76 |     allocaBytes sizeCtx $ \(ptr :: Ptr RawCtx) ->
 77 |     do c_md5_init ptr
 78 |        f (Ctx ptr)
 79 |        allocaBytes digestSize $ \(resPtr :: Ptr Word8) ->
 80 |            do c_md5_finalize ptr resPtr
 81 |               let first = castPtr resPtr :: Ptr Word64
 82 |               w1 <- peek first
 83 |               let second = castPtr (plusPtr resPtr (sizeOf w1)) :: Ptr Word64
 84 |               w2 <- peek second
 85 |               return (MD5Hash (Word128 w1 w2))
 86 | 
 87 | md5HashAlgorithm :: HashAlgorithm MD5Hash
 88 | md5HashAlgorithm =
 89 |     HashAlgorithm
 90 |     { ha_run = run
 91 |     , ha_xor = xorMD5
 92 |     , ha_updateHash = updateHash
 93 |     }
 94 |     where
 95 |       xorMD5 (MD5Hash h1) (MD5Hash h2) = MD5Hash (h1 `xorW128` h2)
 96 |       updateHash updates (MD5Hash h) =
 97 |           let f = hu_updateULong updates
 98 |           in do f (w128_first h)
 99 |                 f (w128_second h)
100 |       run f =
101 |           withCtx $ \(Ctx ctxPtr) ->
102 |               let !updates =
103 |                       HashUpdates
104 |                       { hu_updatePtr = c_md5_update ctxPtr
105 |                       , hu_updateUChar = c_md5_update_uchar ctxPtr
106 |                       , hu_updateUShort = c_md5_update_ushort ctxPtr
107 |                       , hu_updateUInt = c_md5_update_uint ctxPtr
108 |                       , hu_updateULong = c_md5_update_ulong ctxPtr
109 |                       }
110 |               in f updates
111 | 
112 | runMD5 :: LH () -> MD5Hash
113 | runMD5 = runLH md5HashAlgorithm
114 | 


--------------------------------------------------------------------------------
/src/Data/LargeHashable/TH.hs:
--------------------------------------------------------------------------------
  1 | {-# LANGUAGE CPP             #-}
  2 | {-# LANGUAGE QuasiQuotes     #-}
  3 | {-# LANGUAGE TemplateHaskell #-}
  4 | module Data.LargeHashable.TH (
  5 | 
  6 |     deriveLargeHashable, deriveLargeHashableCtx, deriveLargeHashableNoCtx
  7 |   , deriveLargeHashableCustomCtx
  8 | 
  9 | ) where
 10 | 
 11 | import Control.Arrow (first)
 12 | import Control.Monad (forM)
 13 | import Data.LargeHashable.Class
 14 | import Data.Word
 15 | import Language.Haskell.TH
 16 | 
 17 | -- | Template Haskell function to automatically derive
 18 | --   instances of 'LargeHashable'. The derived instances first
 19 | --   calls 'updateHash' with an unique identifier number for
 20 | --   every constructor, followed by 'updateHash' calls for every
 21 | --   field of the constructor (if existent). It also works for
 22 | --   type families.
 23 | --
 24 | --   E. g. for the following code
 25 | --
 26 | --   @
 27 | --
 28 | -- data BlaFoo a = Foo
 29 | --               | Bar Int a
 30 | --               | Baz a a
 31 | --
 32 | -- $(deriveLargeHashable ''BlaFoo)
 33 | --   @
 34 | --
 35 | --   The following instance gets generated:
 36 | --
 37 | --   @
 38 | -- instance LargeHashable a_apg8 =>
 39 | --         LargeHashable (BlaFoo a_apg8) where
 40 | --  updateHash Foo = updateHash (0 :: Foreign.C.Types.CULong)
 41 | --  updateHash (Bar a b)
 42 | --    = (((updateHash (1 :: Foreign.C.Types.CULong)) >> (updateHash a))
 43 | --       >> (updateHash b))
 44 | --  updateHash (XY a b)
 45 | --    = (((updateHash (2 :: Foreign.C.Types.CULong)) >> (updateHash a))
 46 | --       >> (updateHash b))
 47 | --    @
 48 | deriveLargeHashable :: Name -> Q [Dec]
 49 | deriveLargeHashable n = reify n >>= \info ->
 50 |     case info of
 51 |         TyConI dec ->
 52 |             case dec of
 53 | #if MIN_VERSION_template_haskell(2,11,0)
 54 |                 DataD context name tyvars _ cons _ ->
 55 | #else
 56 |                 DataD context name tyvars cons _ ->
 57 | #endif
 58 |                     buildInstance (ConT name) context tyvars cons
 59 | 
 60 | #if MIN_VERSION_template_haskell(2,11,0)
 61 |                 NewtypeD context name tyvars _ con _  ->
 62 | #else
 63 |                 NewtypeD context name tyvars con _  ->
 64 | #endif
 65 |                     buildInstance (ConT name) context tyvars [con]
 66 |                 _ -> fail $ notDeriveAbleErrorMsg n info
 67 |         FamilyI _ instDecs -> fmap concat $ forM instDecs $ \instDec ->
 68 |             case instDec of
 69 | #if MIN_VERSION_template_haskell(2,15,0)
 70 |                 DataInstD context _ ty _ cons _ ->
 71 | #elif MIN_VERSION_template_haskell(2,11,0)
 72 |                 DataInstD context name types _ cons _ -> let ty = foldl AppT (ConT name) types in
 73 | #else
 74 |                 DataInstD context name types cons _ -> let ty = foldl AppT (ConT name) types in
 75 | #endif
 76 |                     buildInstance ty context [] cons
 77 | #if MIN_VERSION_template_haskell(2,15,0)
 78 |                 NewtypeInstD context _ ty _ con _ ->
 79 | #elif MIN_VERSION_template_haskell(2,11,0)
 80 |                 NewtypeInstD context name types _ con _ -> let ty = foldl AppT (ConT name) types in
 81 | #else
 82 |                 NewtypeInstD context name types con _ -> let ty = foldl AppT (ConT name) types in
 83 | #endif
 84 |                     buildInstance ty context [] [con]
 85 |                 _ -> fail $ notDeriveAbleErrorMsg n info
 86 |         _ -> fail $ notDeriveAbleErrorMsg n info
 87 | 
 88 | -- | Derive a 'LargeHashable' instance with extra constraints in the
 89 | -- context of the instance.
 90 | deriveLargeHashableCtx ::
 91 |        Name
 92 |     -> ([TypeQ] -> [PredQ])
 93 |        -- ^ Function mapping the type variables in the instance head to the additional constraints
 94 |     -> Q [Dec]
 95 | deriveLargeHashableCtx tyName extraPreds =
 96 |     deriveLargeHashableCustomCtx tyName mkCtx
 97 |     where
 98 |       mkCtx args oldCtx =
 99 |           oldCtx ++ extraPreds args
100 | 
101 | -- | Derive a 'LargeHashable' instance with no constraints in the context of the instance.
102 | deriveLargeHashableNoCtx ::
103 |        Name
104 |     -> (Q [Dec])
105 | deriveLargeHashableNoCtx tyName =
106 |     deriveLargeHashableCustomCtx tyName (\_ _ -> [])
107 | 
108 | -- | Derive a 'LargeHashable' instance with a completely custom instance context.
109 | deriveLargeHashableCustomCtx ::
110 |        Name
111 |     -> ([TypeQ] -> [PredQ] -> [PredQ])
112 |        -- ^ Function mapping the type variables in the instance head and the
113 |        -- constraints that would normally be generated to the constraints
114 |        -- that should be generated.
115 |     -> (Q [Dec])
116 | deriveLargeHashableCustomCtx tyName extraPreds =
117 |     do decs <- deriveLargeHashable tyName
118 |        case decs of
119 | #if MIN_VERSION_template_haskell(2,11,0)
120 |          (InstanceD overlap ctx ty body : _) ->
121 | #else
122 |          (InstanceD ctx ty body : _) ->
123 | #endif
124 |              do let args = reverse (collectArgs ty)
125 |                 newCtx <- sequence (extraPreds (map return args) (map return ctx))
126 |                 -- _ <- fail ("args: " ++ show args ++", ty: " ++ show ty)
127 | #if MIN_VERSION_template_haskell(2,11,0)
128 |                 return [InstanceD overlap newCtx ty body]
129 | #else
130 |                 return [InstanceD newCtx ty body]
131 | #endif
132 |          _ ->
133 |              error $
134 |                  "Unexpected declarations returned by deriveLargeHashable: " ++ show (ppr decs)
135 |     where
136 |       collectArgs :: Type -> [Type]
137 |       collectArgs outerTy =
138 |           let loop ty =
139 |                   case ty of
140 |                     (AppT l r) ->
141 |                         case l of
142 |                           AppT _ _ -> r : loop l
143 |                           _ -> [r]
144 |                     _ -> []
145 |           in case outerTy of
146 |                AppT _ r -> loop r
147 |                _ -> []
148 | 
149 | -- | Generates the error message displayed when somebody tries to let us
150 | --   derive impossible instances!
151 | notDeriveAbleErrorMsg :: Name -> Info -> String
152 | notDeriveAbleErrorMsg name info = "Could not derive LargeHashable instance for "
153 |     ++ (show name) ++ "(" ++ (show info) ++ "). If you think this should be possible, file an issue."
154 | 
155 | -- | After 'deriveLargeHashable' has matched all the important information
156 | --   this function gets called to build the instance declaration.
157 | #if MIN_VERSION_template_haskell(2,17,0)
158 | buildInstance :: Type -> Cxt -> [TyVarBndr f] -> [Con] ->  Q [Dec]
159 | #else
160 | buildInstance :: Type -> Cxt -> [TyVarBndr] -> [Con] ->  Q [Dec]
161 | #endif
162 | buildInstance basicType context vars cons =
163 |     let consWithIds = zip [0..] cons
164 |         constraints = makeConstraints context vars
165 |         typeWithVars = foldl appT (return basicType) $ map (varT . varName) vars
166 |       in (:[]) <$> instanceD constraints (conT ''LargeHashable `appT` typeWithVars)
167 |             [updateHashDeclaration 'updateHash consWithIds,
168 |              updateHashDeclaration 'updateHashStable consWithIds]
169 | 
170 | -- | This function generates the declaration for the 'updateHash' and the
171 | --   'updateHashStable functions
172 | --   of the 'LargeHashable' typeclass. By taking the constructors with there
173 | --   selected IDs and calling 'updateHashClause' for everyone of them to generate
174 | --   the corresponding clause.
175 | updateHashDeclaration :: Name -> [(Integer, Con)] -> Q Dec
176 | updateHashDeclaration name [(_, con)] =
177 |     funD name [updateHashClause name Nothing con]
178 | updateHashDeclaration name consWIds =
179 |     funD name (map (uncurry (updateHashClause name) . first Just) consWIds)
180 | 
181 | -- | 'updateHashClause' generates a clause of the 'updateHash' function.
182 | --   It makes sure all the fields are matched correctly and updates the hash
183 | --   with the neccessary information about the constructor (its ID) and all
184 | --   of its fields.
185 | updateHashClause :: Name -> Maybe Integer -> Con -> Q Clause
186 | updateHashClause name mI con =
187 |         clause [return patOfClause]
188 |             (normalB $
189 |                 foldl sequenceExps
190 |                     conMarker
191 |                     hashUpdatesOfConFields)
192 |             []
193 |     where conMarker = case mI of
194 |                         Just i -> [| updateHash ($(litE . IntegerL $ i) :: Word64) |]
195 |                         Nothing -> [| return () |]
196 |           hashUpdatesOfConFields = map (\pn -> [| $(varE name) $(varE pn) |]) patVarNames
197 |           -- Extract the names of all the
198 |           -- pattern variables from usedPat.
199 |           patVarNames = case patOfClause of
200 | #if MIN_VERSION_template_haskell(2,18,0)
201 |                           ConP _ _ vars -> map (\(VarP v) -> v) vars
202 | #else
203 |                           ConP _ vars -> map (\(VarP v) -> v) vars
204 | #endif
205 |                           InfixP (VarP v1) _ (VarP v2) -> [v1, v2]
206 |                           _ -> error "Pattern in patVarNames not matched!"
207 |           patOfClause = patternForCon con
208 | 
209 | -- | Generate a Pattern that matches the supplied constructor
210 | --   and all of its fields.
211 | patternForCon :: Con -> Pat
212 | patternForCon con = case con of
213 |               NormalC n types -> conP n $ uniqueVarPats (length types)
214 |               RecC n varTypes -> conP n $ uniqueVarPats (length varTypes)
215 |               InfixC _ n _ -> InfixP (VarP . mkName $ "x") n (VarP . mkName $ "y")
216 |               c@(ForallC{}) -> error $ "Cannot derive quantified type as it would potentially violate uniqueness: "++ show c
217 | #if MIN_VERSION_template_haskell(2,11,0)
218 |               GadtC [n] types _ -> conP n $ uniqueVarPats (length types)
219 |               RecGadtC [n] varTypes _ -> conP n $ uniqueVarPats (length varTypes)
220 |               _ -> error $ "Constructor not supported: "++show con
221 | #endif
222 |     where
223 |         uniqueVarPats n = take n . map (VarP . mkName) $ names
224 |         conP n =
225 | #if MIN_VERSION_template_haskell(2,18,0)
226 |             ConP n []
227 | #else
228 |             ConP n
229 | #endif
230 | 
231 | 
232 | -- | Sequences two Expressions using the '(>>)' operator.
233 | sequenceExps :: Q Exp -> Q Exp -> Q Exp
234 | sequenceExps first second = infixE (Just first) (varE '(>>)) (Just second)
235 | 
236 | -- | Generates the constraints needed for the declaration of
237 | --   the 'LargeHashable' class. This means that the constraint
238 | --   @LargeHashable $TypeVar$@ is added for every type variable
239 | --   the type has.
240 | #if MIN_VERSION_template_haskell(2,17,0)
241 | makeConstraints :: Cxt -> [TyVarBndr f] -> Q Cxt
242 | #else
243 | makeConstraints :: Cxt -> [TyVarBndr] -> Q Cxt
244 | #endif
245 | makeConstraints context vars = return $ context ++
246 |     map (\v -> (ConT (toLargeHashableClass v)) `AppT` (VarT . varName $ v)) vars
247 |     where
248 | #if MIN_VERSION_template_haskell(2,17,0)
249 |       toLargeHashableClass :: TyVarBndr f -> Name
250 |       toLargeHashableClass var =
251 |         case var of
252 |           (PlainTV _ _) -> ''LargeHashable
253 |           (KindedTV _ _ (AppT (AppT ArrowT StarT) StarT)) -> ''LargeHashable'
254 |           (KindedTV _ _ _) -> ''LargeHashable
255 | #else
256 |       toLargeHashableClass :: TyVarBndr -> Name
257 |       toLargeHashableClass var =
258 |         case var of
259 |           (PlainTV _) -> ''LargeHashable
260 |           (KindedTV _ (AppT (AppT ArrowT StarT) StarT)) -> ''LargeHashable'
261 |           (KindedTV _ _) -> ''LargeHashable
262 | #endif
263 | 
264 | -- | Returns the 'Name' for a type variable.
265 | #if MIN_VERSION_template_haskell(2,17,0)
266 | varName :: TyVarBndr f -> Name
267 | varName (PlainTV n _) = n
268 | varName (KindedTV n _ _) = n
269 | #else
270 | varName :: TyVarBndr -> Name
271 | varName (PlainTV n) = n
272 | varName (KindedTV n _) = n
273 | #endif
274 | 
275 | -- | An infinite list of unique names that
276 | --   are used in the generations of patterns.
277 | names :: [String]
278 | names = concat $ map (gen (map (:[]) ['a'..'z'])) [0..]
279 |   where gen :: [String] -> Integer -> [String]
280 |         gen acc 0 = acc
281 |         gen acc n = gen (concat $ map (\q -> map (\c -> c : q) ['a'..'z']) acc) (n - 1)
282 | 


--------------------------------------------------------------------------------
/stack-ghc-9.2.yaml:
--------------------------------------------------------------------------------
 1 | resolver: lts-20.21
 2 | require-stack-version: ">= 1.0.0"
 3 | 
 4 | packages:
 5 | - '.'
 6 | 
 7 | flags: {}
 8 | 
 9 | extra-deps:
10 |   - crypton-1.0.1@sha256:f41316fbc6ad878396e476355e27b70ac35c344d74e3eefafe709e03b192be9e,14527
11 | 


--------------------------------------------------------------------------------
/stack-ghc-9.2.yaml.lock:
--------------------------------------------------------------------------------
 1 | # This file was autogenerated by Stack.
 2 | # You should not edit this file by hand.
 3 | # For more information, please see the documentation at:
 4 | #   https://docs.haskellstack.org/en/stable/lock_files
 5 | 
 6 | packages:
 7 | - completed:
 8 |     hackage: crypton-1.0.1@sha256:f41316fbc6ad878396e476355e27b70ac35c344d74e3eefafe709e03b192be9e,14527
 9 |     pantry-tree:
10 |       sha256: b8f0a33755a0871d325300024592cd9306e393941248184e9c4faf417d4adfcd
11 |       size: 23276
12 |   original:
13 |     hackage: crypton-1.0.1@sha256:f41316fbc6ad878396e476355e27b70ac35c344d74e3eefafe709e03b192be9e,14527
14 | snapshots:
15 | - completed:
16 |     sha256: 401a0e813162ba62f04517f60c7d25e93a0f867f94a902421ebf07d1fb5a8c46
17 |     size: 650044
18 |     url: https://raw.githubusercontent.com/commercialhaskell/stackage-snapshots/master/lts/20/21.yaml
19 |   original: lts-20.21
20 | 


--------------------------------------------------------------------------------
/stack-ghc-9.4.yaml:
--------------------------------------------------------------------------------
1 | resolver: lts-21.25
2 | require-stack-version: ">= 1.0.0"
3 | 
4 | packages:
5 | - '.'
6 | 
7 | flags: {}
8 | 


--------------------------------------------------------------------------------
/stack-ghc-9.4.yaml.lock:
--------------------------------------------------------------------------------
 1 | # This file was autogenerated by Stack.
 2 | # You should not edit this file by hand.
 3 | # For more information, please see the documentation at:
 4 | #   https://docs.haskellstack.org/en/stable/lock_files
 5 | 
 6 | packages: []
 7 | snapshots:
 8 | - completed:
 9 |     sha256: 0a8278a8c9f33fbdaa40167e035065a7ca8ed61d89fec21236b888c470dab3f0
10 |     size: 625218
11 |     url: https://raw.githubusercontent.com/commercialhaskell/stackage-snapshots/master/nightly/2023/5/8.yaml
12 |   original: nightly-2023-05-08
13 | 


--------------------------------------------------------------------------------
/stack-ghc-9.6.yaml:
--------------------------------------------------------------------------------
1 | resolver: lts-22.34
2 | require-stack-version: ">= 1.0.0"
3 | 
4 | packages:
5 | - '.'
6 | 
7 | flags: {}
8 | 


--------------------------------------------------------------------------------
/stack-ghc-9.6.yaml.lock:
--------------------------------------------------------------------------------
 1 | # This file was autogenerated by Stack.
 2 | # You should not edit this file by hand.
 3 | # For more information, please see the documentation at:
 4 | #   https://docs.haskellstack.org/en/stable/lock_files
 5 | 
 6 | packages: []
 7 | snapshots:
 8 | - completed:
 9 |     sha256: edbd50d7e7c85c13ad5f5835ae2db92fab1e9cf05ecf85340e2622ec0a303df1
10 |     size: 720020
11 |     url: https://raw.githubusercontent.com/commercialhaskell/stackage-snapshots/master/lts/22/34.yaml
12 |   original: lts-22.34
13 | 


--------------------------------------------------------------------------------
/stack-ghc-9.8.yaml:
--------------------------------------------------------------------------------
1 | resolver: lts-23.4
2 | require-stack-version: ">= 1.0.0"
3 | 
4 | packages:
5 | - '.'
6 | 
7 | flags: {}
8 | 


--------------------------------------------------------------------------------
/stack-ghc-9.8.yaml.lock:
--------------------------------------------------------------------------------
 1 | # This file was autogenerated by Stack.
 2 | # You should not edit this file by hand.
 3 | # For more information, please see the documentation at:
 4 | #   https://docs.haskellstack.org/en/stable/lock_files
 5 | 
 6 | packages: []
 7 | snapshots:
 8 | - completed:
 9 |     sha256: 0d61fd2be255f5c425cd92dbb4a78d1f70af2c138f3ec921e98b97ae182b044c
10 |     size: 679291
11 |     url: https://raw.githubusercontent.com/commercialhaskell/stackage-snapshots/master/lts/23/4.yaml
12 |   original: lts-23.4
13 | 


--------------------------------------------------------------------------------
/stack.yaml:
--------------------------------------------------------------------------------
1 | stack-ghc-9.6.yaml


--------------------------------------------------------------------------------
/stack.yaml.lock:
--------------------------------------------------------------------------------
 1 | # This file was autogenerated by Stack.
 2 | # You should not edit this file by hand.
 3 | # For more information, please see the documentation at:
 4 | #   https://docs.haskellstack.org/en/stable/lock_files
 5 | 
 6 | packages: []
 7 | snapshots:
 8 | - completed:
 9 |     sha256: edbd50d7e7c85c13ad5f5835ae2db92fab1e9cf05ecf85340e2622ec0a303df1
10 |     size: 720020
11 |     url: https://raw.githubusercontent.com/commercialhaskell/stackage-snapshots/master/lts/22/34.yaml
12 |   original: lts-22.34
13 | 


--------------------------------------------------------------------------------
/test/Data/LargeHashable/Tests/Class.hs:
--------------------------------------------------------------------------------
  1 | {-# LANGUAGE OverloadedStrings #-}
  2 | {-# OPTIONS_GHC -F -pgmF htfpp #-}
  3 | module Data.LargeHashable.Tests.Class where
  4 | 
  5 | -- keep imports in alphabetic order (in Emacs, use "M-x sort-lines")
  6 | import Data.Fixed
  7 | import Data.Hashable
  8 | import Data.Int
  9 | import Data.LargeHashable
 10 | import Data.LargeHashable.Tests.Helper
 11 | import Data.Map (Map ())
 12 | import Data.Ratio
 13 | import Data.Set (Set ())
 14 | import Data.Time.Calendar
 15 | import Data.Time.Clock
 16 | import Data.Time.Clock.TAI
 17 | import Data.Time.LocalTime
 18 | import Data.Word
 19 | import Test.Framework hiding (Fixed (..))
 20 | import qualified Data.Aeson as J
 21 | import qualified Data.ByteString as B
 22 | import qualified Data.ByteString.Lazy as BL
 23 | import qualified Data.ByteString.Short as BS
 24 | import qualified Data.HashMap.Lazy as HML
 25 | import qualified Data.HashMap.Strict as HM
 26 | import qualified Data.HashSet as HS
 27 | import qualified Data.IntMap as IM
 28 | import qualified Data.IntSet as IS
 29 | import qualified Data.Map as Map
 30 | import qualified Data.Scientific as Sci
 31 | import qualified Data.Sequence as Seq
 32 | import qualified Data.Set as Set
 33 | import qualified Data.Strict.Tuple as Tuple
 34 | import qualified Data.Text as T
 35 | import qualified Data.Text.Lazy as TL
 36 | import qualified Data.Vector as V
 37 | 
 38 | -- of course we can't fully prove uniqueness using
 39 | -- properties and there is a small chance of collisions
 40 | generic_uniquenessProp :: (Eq a, LargeHashable a)  => a -> a -> Bool
 41 | generic_uniquenessProp a b = (a == b) == (largeHash md5HashAlgorithm a == largeHash md5HashAlgorithm b)
 42 | 
 43 | prop_integerUniqueness :: Integer -> Integer -> Bool
 44 | prop_integerUniqueness = generic_uniquenessProp
 45 | 
 46 | test_hashInteger :: IO ()
 47 | test_hashInteger =
 48 |     do subAssert $ assertHashes 0
 49 |        subAssert $ assertHashes (toInteger (maxBound::Int))
 50 |        subAssert $ assertHashes (toInteger (minBound::Int))
 51 |     where
 52 |       assertHashes :: Integer -> IO ()
 53 |       assertHashes n =
 54 |           do let list = [(n-100)..(n+100)]
 55 |                  hashes = map (largeHash md5HashAlgorithm) list
 56 |                  hashesSet = Set.fromList hashes
 57 |              assertEqual (length list) (Set.size hashesSet)
 58 | 
 59 | test_hashBool :: IO ()
 60 | test_hashBool =
 61 |     assertBool (largeHash md5HashAlgorithm True /= largeHash md5HashAlgorithm False)
 62 | 
 63 | prop_genericUniqueness :: TestA -> TestA -> Bool
 64 | prop_genericUniqueness = generic_uniquenessProp
 65 | 
 66 | prop_intUniqueness :: Int -> Int -> Bool
 67 | prop_intUniqueness = generic_uniquenessProp
 68 | 
 69 | prop_int8Uniqueness :: Int8 -> Int8 -> Bool
 70 | prop_int8Uniqueness = generic_uniquenessProp
 71 | 
 72 | prop_int16Uniqueness :: Int16 -> Int16 -> Bool
 73 | prop_int16Uniqueness = generic_uniquenessProp
 74 | 
 75 | prop_int32Uniqueness :: Int32 -> Int32 -> Bool
 76 | prop_int32Uniqueness = generic_uniquenessProp
 77 | 
 78 | prop_int64Uniqueness :: Int64 -> Int64 -> Bool
 79 | prop_int64Uniqueness = generic_uniquenessProp
 80 | 
 81 | prop_wordUniqueness :: Word -> Word -> Bool
 82 | prop_wordUniqueness = generic_uniquenessProp
 83 | 
 84 | prop_word8Uniqueness :: Word8 -> Word8 -> Bool
 85 | prop_word8Uniqueness = generic_uniquenessProp
 86 | 
 87 | prop_word16Uniqueness :: Word16 -> Word16 -> Bool
 88 | prop_word16Uniqueness = generic_uniquenessProp
 89 | 
 90 | prop_word32Uniqueness :: Word32 -> Word32 -> Bool
 91 | prop_word32Uniqueness = generic_uniquenessProp
 92 | 
 93 | prop_word64Uniqueness :: Word64 -> Word64 -> Bool
 94 | prop_word64Uniqueness = generic_uniquenessProp
 95 | 
 96 | prop_charUniqueness :: Char -> Char -> Bool
 97 | prop_charUniqueness = generic_uniquenessProp
 98 | 
 99 | prop_bytestringUniqueness :: B.ByteString -> B.ByteString -> Bool
100 | prop_bytestringUniqueness = generic_uniquenessProp
101 | 
102 | prop_appendByteStringOk :: B.ByteString -> B.ByteString -> Bool
103 | prop_appendByteStringOk b1 b2 =
104 |     runMD5 (updateHash (b1 `B.append` b2)) /=
105 |     runMD5 (updateHash b1 >> updateHash b2)
106 | 
107 | test_irrelevantByteStringChunking :: IO ()
108 | test_irrelevantByteStringChunking = do
109 |     assertEqual (largeHash md5HashAlgorithm (BL.fromChunks ["foo", "ba", "r"]))
110 |                 (largeHash md5HashAlgorithm (BL.fromChunks ["foob", "ar"]))
111 | 
112 | prop_lazyBytestringUniqueness :: BL.ByteString -> BL.ByteString -> Bool
113 | prop_lazyBytestringUniqueness = generic_uniquenessProp
114 | 
115 | prop_appendLazyByteStringOk :: BL.ByteString -> BL.ByteString -> Bool
116 | prop_appendLazyByteStringOk b1 b2 =
117 |     runMD5 (updateHash (b1 `BL.append` b2)) /=
118 |     runMD5 (updateHash b1 >> updateHash b2)
119 | 
120 | prop_shortBytestringUniqueness :: BS.ShortByteString -> BS.ShortByteString -> Bool
121 | prop_shortBytestringUniqueness = generic_uniquenessProp
122 | 
123 | prop_textUniqueness :: T.Text -> T.Text -> Bool
124 | prop_textUniqueness = generic_uniquenessProp
125 | 
126 | prop_appendTextOk :: T.Text -> T.Text -> Bool
127 | prop_appendTextOk t1 t2 =
128 |     runMD5 (updateHash (t1 `T.append` t2)) /=
129 |     runMD5 (updateHash t1 >> updateHash t2)
130 | 
131 | test_irrelevantTextChunking :: IO ()
132 | test_irrelevantTextChunking = do
133 |     assertEqual (largeHash md5HashAlgorithm (TL.fromChunks ["don't", " pa", "nic"]))
134 |                 (largeHash md5HashAlgorithm (TL.fromChunks ["don", "'t p", "an", "ic"]))
135 | 
136 | prop_lazyTextUniqueness :: TL.Text -> TL.Text -> Bool
137 | prop_lazyTextUniqueness = generic_uniquenessProp
138 | 
139 | prop_appendLazyTextOk :: TL.Text -> TL.Text -> Bool
140 | prop_appendLazyTextOk t1 t2 =
141 |     runMD5 (updateHash (t1 `TL.append` t2)) /=
142 |     runMD5 (updateHash t1 >> updateHash t2)
143 | 
144 | prop_lazyStrictTextSame :: [T.Text] -> Bool
145 | prop_lazyStrictTextSame chunks =
146 |     let lazy = TL.fromChunks chunks
147 |         strict = mconcat chunks
148 |     in runMD5 (updateHash lazy) == runMD5 (updateHash strict)
149 | 
150 | prop_listUniqueness :: [Bool] -> [Bool] -> Bool
151 | prop_listUniqueness = generic_uniquenessProp
152 | 
153 | prop_appendListOk :: [Int] -> [Int] -> Bool
154 | prop_appendListOk l1 l2 =
155 |     runMD5 (updateHash (l1 ++ l2)) /=
156 |     runMD5 (updateHash l1 >> updateHash l2)
157 | 
158 | prop_DoubleUniqueness :: Double -> Double -> Bool
159 | prop_DoubleUniqueness = generic_uniquenessProp
160 | 
161 | prop_FloatUniqueness :: Float -> Float -> Bool
162 | prop_FloatUniqueness = generic_uniquenessProp
163 | 
164 | prop_setUniqueness :: Set Int -> Set Int -> Bool
165 | prop_setUniqueness = generic_uniquenessProp
166 | 
167 | test_hashSet :: IO ()
168 | test_hashSet =
169 |     do let s1 = Set.fromList [1::Int, 2, 3]
170 |            s2 = Set.fromList [3::Int, 2, 1]
171 |            s3 = Set.fromList [2::Int, 1, 3]
172 |        assertEqual (largeHash md5HashAlgorithm s1) (largeHash md5HashAlgorithm s2)
173 |        assertEqual (largeHash md5HashAlgorithm s1) (largeHash md5HashAlgorithm s3)
174 |        assertEqual (largeHash md5HashAlgorithm s2) (largeHash md5HashAlgorithm s3)
175 | 
176 | prop_unionSetOk :: Set Int -> Set Int -> Bool
177 | prop_unionSetOk s1 s2 =
178 |     runMD5 (updateHash (s1 `Set.union` s2)) /= runMD5 (updateHash s1 >> updateHash s2)
179 | 
180 | prop_intSetUniqueness :: IS.IntSet -> IS.IntSet -> Bool
181 | prop_intSetUniqueness = generic_uniquenessProp
182 | 
183 | test_hashIntSet :: IO ()
184 | test_hashIntSet =
185 |     do let s1 = IS.fromList [1::Int, 2, 3]
186 |            s2 = IS.fromList [3::Int, 2, 1]
187 |            s3 = IS.fromList [2::Int, 1, 3]
188 |        assertEqual (largeHash md5HashAlgorithm s1) (largeHash md5HashAlgorithm s2)
189 |        assertEqual (largeHash md5HashAlgorithm s1) (largeHash md5HashAlgorithm s3)
190 |        assertEqual (largeHash md5HashAlgorithm s2) (largeHash md5HashAlgorithm s3)
191 | 
192 | prop_unionIntSetOk :: IS.IntSet -> IS.IntSet -> Bool
193 | prop_unionIntSetOk s1 s2 =
194 |     runMD5 (updateHash (s1 `IS.union` s2)) /=
195 |     runMD5 (updateHash s1 >> updateHash s2)
196 | 
197 | prop_hashSetUniqueness :: HS.HashSet Int -> HS.HashSet Int -> Bool
198 | prop_hashSetUniqueness = generic_uniquenessProp
199 | 
200 | data ConstHash
201 |     = ConstHash Int
202 |       deriving (Eq)
203 | 
204 | instance Hashable ConstHash where
205 |     hashWithSalt _ _ = 0
206 | 
207 | instance LargeHashable ConstHash where
208 |     updateHash (ConstHash i) = updateHash i
209 |     updateHashStable (ConstHash i) = updateHashStable i
210 | 
211 | test_hashHashSet :: IO ()
212 | test_hashHashSet =
213 |     do let s1 = HS.fromList [ConstHash 1, ConstHash 2, ConstHash 3]
214 |            s2 = HS.fromList [ConstHash 3, ConstHash 2, ConstHash 1]
215 |            s3 = HS.fromList [ConstHash 2, ConstHash 1, ConstHash 3]
216 |        assertEqual (largeHash md5HashAlgorithm s1) (largeHash md5HashAlgorithm s2)
217 |        assertEqual (largeHash md5HashAlgorithm s1) (largeHash md5HashAlgorithm s3)
218 |        assertEqual (largeHash md5HashAlgorithm s2) (largeHash md5HashAlgorithm s3)
219 | 
220 | prop_unionHashSetOk :: HS.HashSet Int -> HS.HashSet Int -> Bool
221 | prop_unionHashSetOk s1 s2 =
222 |     runMD5 (updateHash (s1 `HS.union` s2)) /=
223 |     runMD5 (updateHash s1 >> updateHash s2)
224 | 
225 | prop_mapUniqueness :: Map Int String -> Map Int String -> Bool
226 | prop_mapUniqueness = generic_uniquenessProp
227 | 
228 | test_hashMap :: IO ()
229 | test_hashMap =
230 |     do let m1 = Map.fromList [(1::Int, "1"::String), (2, "2"), (3, "3")]
231 |            m2 = Set.fromList [(3::Int, "3"::String), (2, "2"), (1, "1")]
232 |            m3 = Set.fromList [(2::Int, "2"::String), (1, "1"), (3, "3")]
233 |        assertEqual (largeHash md5HashAlgorithm m1) (largeHash md5HashAlgorithm m2)
234 |        assertEqual (largeHash md5HashAlgorithm m1) (largeHash md5HashAlgorithm m3)
235 |        assertEqual (largeHash md5HashAlgorithm m2) (largeHash md5HashAlgorithm m3)
236 | 
237 | prop_unionMapOk :: Map Int Bool -> Map Int Bool -> Bool
238 | prop_unionMapOk m1 m2 =
239 |     runMD5 (updateHash (m1 `Map.union` m2)) /= runMD5 (updateHash m1 >> updateHash m2)
240 | 
241 | prop_intMapUniqueness :: IM.IntMap String -> IM.IntMap String -> Bool
242 | prop_intMapUniqueness = generic_uniquenessProp
243 | 
244 | test_hashIntMap :: IO ()
245 | test_hashIntMap =
246 |     do let m1 = IM.fromList [(1::Int, "1"::String), (2, "2"), (3, "3")]
247 |            m2 = IM.fromList [(3::Int, "3"::String), (2, "2"), (1, "1")]
248 |            m3 = IM.fromList [(2::Int, "2"::String), (1, "1"), (3, "3")]
249 |        assertEqual (largeHash md5HashAlgorithm m1) (largeHash md5HashAlgorithm m2)
250 |        assertEqual (largeHash md5HashAlgorithm m1) (largeHash md5HashAlgorithm m3)
251 |        assertEqual (largeHash md5HashAlgorithm m2) (largeHash md5HashAlgorithm m3)
252 | 
253 | prop_unionIntMapOk :: IM.IntMap Bool -> IM.IntMap Bool -> Bool
254 | prop_unionIntMapOk m1 m2 =
255 |     runMD5 (updateHash (m1 `IM.union` m2)) /= runMD5 (updateHash m1 >> updateHash m2)
256 | 
257 | prop_hashMapUniqueness :: HM.HashMap Int Bool -> HM.HashMap Int Bool -> Bool
258 | prop_hashMapUniqueness = generic_uniquenessProp
259 | 
260 | test_hashHashMap :: IO ()
261 | test_hashHashMap =
262 |     do let val = "foo" :: String
263 |            m1 = HM.fromList [(ConstHash 1, val), (ConstHash 2, val), (ConstHash 3, val)]
264 |            m2 = HM.fromList [(ConstHash 3, val), (ConstHash 2, val), (ConstHash 1, val)]
265 |            m3 = HM.fromList [(ConstHash 2, val), (ConstHash 1, val), (ConstHash 3, val)]
266 |        assertEqual (largeHash md5HashAlgorithm m1) (largeHash md5HashAlgorithm m2)
267 |        assertEqual (largeHash md5HashAlgorithm m1) (largeHash md5HashAlgorithm m3)
268 |        assertEqual (largeHash md5HashAlgorithm m2) (largeHash md5HashAlgorithm m3)
269 | 
270 | prop_unionHashMapOk :: HM.HashMap Int Bool -> HM.HashMap Int Bool -> Bool
271 | prop_unionHashMapOk m1 m2 =
272 |     runMD5 (updateHash (m1 `HM.union` m2)) /= runMD5 (updateHash m1 >> updateHash m2)
273 | 
274 | prop_hashMapLazyUniqueness :: HML.HashMap Int Bool -> HML.HashMap Int Bool -> Bool
275 | prop_hashMapLazyUniqueness = generic_uniquenessProp
276 | 
277 | test_hashHashMapLazy :: IO ()
278 | test_hashHashMapLazy =
279 |     do let val = "foo" :: String
280 |            m1 = HML.fromList [(ConstHash 1, val), (ConstHash 2, val), (ConstHash 3, val)]
281 |            m2 = HML.fromList [(ConstHash 3, val), (ConstHash 2, val), (ConstHash 1, val)]
282 |            m3 = HML.fromList [(ConstHash 2, val), (ConstHash 1, val), (ConstHash 3, val)]
283 |        assertEqual (largeHash md5HashAlgorithm m1) (largeHash md5HashAlgorithm m2)
284 |        assertEqual (largeHash md5HashAlgorithm m1) (largeHash md5HashAlgorithm m3)
285 |        assertEqual (largeHash md5HashAlgorithm m2) (largeHash md5HashAlgorithm m3)
286 | 
287 | prop_hashPairUniqueness :: (Int, Int) -> (Int, Int) -> Bool
288 | prop_hashPairUniqueness = generic_uniquenessProp
289 | 
290 | prop_hashTripleUniqueness :: (Int, Int, Int) -> (Int, Int, Int) -> Bool
291 | prop_hashTripleUniqueness = generic_uniquenessProp
292 | 
293 | prop_hashQuadrupleUniqueness :: (Int, Int, Int, Int) -> (Int, Int, Int, Int) -> Bool
294 | prop_hashQuadrupleUniqueness = generic_uniquenessProp
295 | 
296 | prop_hashQuintupleUniqueness :: (Int, Int, Int, Int, Int) -> (Int, Int, Int, Int, Int) -> Bool
297 | prop_hashQuintupleUniqueness = generic_uniquenessProp
298 | 
299 | prop_hashStrictPairUniqueness :: Tuple.Pair Int Int -> Tuple.Pair Int Int -> Bool
300 | prop_hashStrictPairUniqueness = generic_uniquenessProp
301 | 
302 | prop_hashMaybeUniqueness :: Maybe Int -> Maybe Int -> Bool
303 | prop_hashMaybeUniqueness = generic_uniquenessProp
304 | 
305 | prop_hashEitherUniqueness :: Either Int Bool -> Either Int Bool -> Bool
306 | prop_hashEitherUniqueness = generic_uniquenessProp
307 | 
308 | prop_hashOrderingUniqueness :: Ordering -> Ordering -> Bool
309 | prop_hashOrderingUniqueness = generic_uniquenessProp
310 | 
311 | prop_ratioUniqueness :: Ratio Int -> Ratio Int -> Bool
312 | prop_ratioUniqueness = generic_uniquenessProp
313 | 
314 | test_ratio :: IO ()
315 | test_ratio =
316 |     do let r1 = (1::Int) % 2
317 |            r2 = (2::Int) % 4
318 |        assertEqual (largeHash md5HashAlgorithm r1) (largeHash md5HashAlgorithm r2)
319 | 
320 | prop_dayUniqueness :: Day -> Day -> Bool
321 | prop_dayUniqueness = generic_uniquenessProp
322 | 
323 | prop_diffTimeUniqueness :: DiffTime -> DiffTime -> Bool
324 | prop_diffTimeUniqueness = generic_uniquenessProp
325 | 
326 | prop_utcTimeUniqueness :: UTCTime -> UTCTime -> Bool
327 | prop_utcTimeUniqueness = generic_uniquenessProp
328 | 
329 | prop_absoluteTimeUniqueness :: AbsoluteTime -> AbsoluteTime -> Bool
330 | prop_absoluteTimeUniqueness = generic_uniquenessProp
331 | 
332 | prop_nominalDiffTimeUniqueness :: NominalDiffTime -> NominalDiffTime -> Bool
333 | prop_nominalDiffTimeUniqueness = generic_uniquenessProp
334 | 
335 | prop_timeZoneUniqueness :: TimeZone -> TimeZone -> Bool
336 | prop_timeZoneUniqueness = generic_uniquenessProp
337 | 
338 | prop_timeOfDayUniqueness :: TimeOfDay -> TimeOfDay -> Bool
339 | prop_timeOfDayUniqueness = generic_uniquenessProp
340 | 
341 | prop_localTimeUniqueness :: LocalTime -> LocalTime -> Bool
342 | prop_localTimeUniqueness = generic_uniquenessProp
343 | 
344 | prop_fixedUniqueness :: Fixed E1 -> Fixed E1 -> Bool
345 | prop_fixedUniqueness = generic_uniquenessProp
346 | 
347 | prop_scientificUniqueness :: Sci.Scientific -> Sci.Scientific -> Bool
348 | prop_scientificUniqueness = generic_uniquenessProp
349 | 
350 | prop_aesonValueUniqueness :: J.Value -> J.Value -> Bool
351 | prop_aesonValueUniqueness = generic_uniquenessProp
352 | 
353 | prop_vectorUniqueness :: V.Vector Int -> V.Vector Int -> Bool
354 | prop_vectorUniqueness = generic_uniquenessProp
355 | 
356 | prop_appendVectorOk :: V.Vector Int -> V.Vector Int -> Bool
357 | prop_appendVectorOk v1 v2 =
358 |     runMD5 (updateHash (v1 V.++ v2)) /=
359 |     runMD5 (updateHash v1 >> updateHash v2)
360 | 
361 | prop_seqUniqueness :: Seq.Seq Int -> Seq.Seq Int -> Bool
362 | prop_seqUniqueness = generic_uniquenessProp
363 | 
364 | prop_appendSeqOk :: Seq.Seq Int -> Seq.Seq Int -> Bool
365 | prop_appendSeqOk s1 s2 =
366 |     runMD5 (updateHash (s1 Seq.>< s2)) /=
367 |     runMD5 (updateHash s1 >> updateHash s2)
368 | 
369 | -- regression test for #25
370 | test_textHash :: IO ()
371 | test_textHash = do
372 |   let t1 = T.pack "abcdefgh"
373 |       t2 = T.pack "abcdxxxx"
374 |   assertEqual
375 |     (largeHash md5HashAlgorithm (T.take 4 t1))
376 |     (largeHash md5HashAlgorithm (T.take 4 t2))
377 |   assertNotEqual
378 |     (largeHash md5HashAlgorithm (T.take 5 t1))
379 |     (largeHash md5HashAlgorithm (T.take 5 t2))
380 | 


--------------------------------------------------------------------------------
/test/Data/LargeHashable/Tests/Helper.hs:
--------------------------------------------------------------------------------
  1 | {-# OPTIONS_GHC -fno-warn-orphans #-}
  2 | {-# LANGUAGE CPP           #-}
  3 | {-# LANGUAGE DeriveGeneric #-}
  4 | {-# LANGUAGE OverloadedStrings #-}
  5 | module Data.LargeHashable.Tests.Helper where
  6 | 
  7 | -- keep imports in alphabetic order (in Emacs, use "M-x sort-lines")
  8 | import Control.Monad
  9 | import Data.Bytes.Serial
 10 | import Data.Hashable
 11 | import Data.LargeHashable
 12 | import Data.Time.Calendar
 13 | import Data.Time.Clock
 14 | import Data.Time.Clock.TAI
 15 | import Data.Time.LocalTime
 16 | import GHC.Generics
 17 | import Test.QuickCheck
 18 | import qualified Data.ByteString as B
 19 | import qualified Data.ByteString.Lazy as BL
 20 | import qualified Data.ByteString.Short as BS
 21 | import qualified Data.HashMap.Lazy as HML
 22 | import qualified Data.HashSet as HS
 23 | import qualified Data.Scientific as Sci
 24 | import qualified Data.Strict.Tuple as Tuple
 25 | import qualified Data.Text as T
 26 | import qualified Data.Text.Lazy as TL
 27 | import qualified Data.Vector as V
 28 | 
 29 | #if !MIN_VERSION_QuickCheck(2,8,2)
 30 | -- keep imports in alphabetic order (in Emacs, use "M-x sort-lines")
 31 | import qualified Data.IntMap as IM
 32 | import qualified Data.IntSet as IS
 33 | import qualified Data.Map as M
 34 | import qualified Data.Set as S
 35 | #endif
 36 | 
 37 | instance Arbitrary T.Text where
 38 |     arbitrary = fmap T.pack arbitrary
 39 |     shrink = map T.pack . shrink . T.unpack
 40 | 
 41 | instance Arbitrary B.ByteString where
 42 |     arbitrary = fmap B.pack arbitrary
 43 |     shrink = map B.pack . shrink . B.unpack
 44 | 
 45 | instance Arbitrary BL.ByteString where
 46 |     arbitrary = fmap BL.fromChunks arbitrary
 47 |     shrink = map BL.fromChunks . shrink . BL.toChunks
 48 | 
 49 | instance Arbitrary TL.Text where
 50 |     arbitrary = fmap TL.fromChunks arbitrary
 51 |     shrink = map TL.fromChunks . shrink . TL.toChunks
 52 | 
 53 | instance Arbitrary a => Arbitrary (V.Vector a) where
 54 |     arbitrary = liftM V.fromList arbitrary
 55 |     shrink = map V.fromList . shrink . V.toList
 56 | 
 57 | instance Arbitrary Sci.Scientific where
 58 |     arbitrary =
 59 |         liftM2 Sci.scientific arbitrary arbitrary
 60 |     shrink = shrinkRealFrac
 61 | 
 62 | #if !MIN_VERSION_aeson(2,0,0)
 63 | instance Arbitrary J.Value where
 64 |     arbitrary = sized arbitraryJsonValue
 65 |         where
 66 |           arbitraryJsonValue n =
 67 |               if n <= 0
 68 |               then oneof simpleGens
 69 |               else frequency $
 70 |                        (map (\g -> (1, g)) simpleGens) ++
 71 |                        [(3, arbitraryObject (n `div` 2))
 72 |                        ,(3, arbitraryArray (n `div` 2))]
 73 |           arbitraryObject n =
 74 |               do l <- arbitraryListOfJsonValues n
 75 |                  elems <- forM l $ \v ->
 76 |                      do k <-
 77 |                             elements
 78 |                                 ["key", "foo", "bar", "baz", "spam", "egg", "chicken", "dog"
 79 |                                 ,"any", "what", "santa", "mark", "wardrobe", "baseball"]
 80 |                         return (k, v)
 81 |                  return (J.Object (HM.fromList elems))
 82 |           arbitraryArray n =
 83 |               do l <- arbitraryListOfJsonValues n
 84 |                  return (J.Array (V.fromList l))
 85 |           arbitraryListOfJsonValues n =
 86 |               do size <- elements [(0::Int)..10]
 87 |                  forM [1..(size::Int)] $ \_ -> arbitraryJsonValue n
 88 |           simpleGens =
 89 |             [liftM J.String arbitrary
 90 |             ,liftM J.Number arbitrary
 91 |             ,liftM J.Bool arbitrary
 92 |             ,return J.Null]
 93 |     shrink v =
 94 |         case v of
 95 |           J.Object obj ->
 96 |               map J.Object (shrink obj)
 97 |           J.Array arr ->
 98 |               map J.Array (shrink arr)
 99 |           J.String t ->
100 |               map J.String (shrink t)
101 |           J.Number n ->
102 |               map J.Number (shrink n)
103 |           J.Bool _ -> []
104 |           J.Null -> []
105 | #endif
106 | 
107 | instance (Eq k, Hashable k, Arbitrary k, Arbitrary a) => Arbitrary (HML.HashMap k a) where
108 |     arbitrary = fmap HML.fromList arbitrary
109 |     shrink = map HML.fromList . shrink . HML.toList
110 | 
111 | instance (Eq a, Hashable a, Arbitrary a) => Arbitrary (HS.HashSet a) where
112 |     arbitrary = fmap HS.fromList arbitrary
113 |     shrink = map HS.fromList . shrink . HS.toList
114 | 
115 | #if !MIN_VERSION_QuickCheck(2,8,2)
116 | instance (Ord k, Arbitrary k, Arbitrary a) => Arbitrary (M.Map k a) where
117 |     arbitrary = fmap M.fromList arbitrary
118 |     shrink = map M.fromList . shrink . M.toList
119 | 
120 | instance Arbitrary a => Arbitrary (IM.IntMap a) where
121 |     arbitrary = fmap IM.fromList arbitrary
122 |     shrink = map IM.fromList . shrink . IM.toList
123 | 
124 | instance (Ord a, Arbitrary a) => Arbitrary (S.Set a) where
125 |     arbitrary = fmap S.fromList arbitrary
126 |     shrink = map S.fromList . shrink . S.toList
127 | 
128 | instance Arbitrary IS.IntSet where
129 |     arbitrary = fmap IS.fromList arbitrary
130 |     shrink = map IS.fromList . shrink . IS.toList
131 | 
132 | #endif
133 | 
134 | data TestA
135 |     = TestA
136 |     { age :: Int
137 |     , wealth :: Integer
138 |     , isStudent :: Bool
139 |     , name :: T.Text
140 |     , initials :: [Char]
141 |     , legLength :: (Int, Int)
142 |     } deriving (Generic, Eq, Show)
143 | 
144 | instance LargeHashable TestA
145 | instance Serial TestA
146 | 
147 | instance Arbitrary TestA where
148 |     arbitrary =
149 |         TestA <$> arbitrary <*> arbitrary <*> arbitrary <*>
150 |               arbitrary <*> arbitrary <*> arbitrary
151 | 
152 | instance Arbitrary BS.ShortByteString where
153 |     arbitrary = liftM BS.pack arbitrary
154 |     shrink x =
155 |         map BS.pack (shrink (BS.unpack x))
156 | 
157 | instance Arbitrary Day where
158 |     arbitrary =
159 |         do n <- arbitrary
160 |            return (ModifiedJulianDay (n `mod` 1000))
161 |     shrink (ModifiedJulianDay n) =
162 |         map ModifiedJulianDay (shrink n)
163 | 
164 | instance Arbitrary DiffTime where
165 |     arbitrary =
166 |         liftM picosecondsToDiffTime arbitrary
167 |     shrink delta =
168 |         let n = toRational delta
169 |         in map fromRational (shrink n)
170 | 
171 | instance Arbitrary UTCTime where
172 |     arbitrary =
173 |         do day <- arbitrary
174 |            picos <- arbitrary
175 |            return (UTCTime day (picosecondsToDiffTime (picos `mod` 86401)))
176 | 
177 | instance Arbitrary AbsoluteTime where
178 |     arbitrary =
179 |         do diff <- arbitrary
180 |            return (addAbsoluteTime diff taiEpoch)
181 | 
182 | instance Arbitrary NominalDiffTime where
183 |     arbitrary =
184 |         do r <- arbitrary
185 |            return (fromRational r)
186 | 
187 | instance Arbitrary TimeZone where
188 |     arbitrary =
189 |         do n <- arbitrary
190 |            s <- arbitrary
191 |            b <- arbitrary
192 |            return (TimeZone (n `mod` (60*24)) b s)
193 | 
194 | instance Arbitrary TimeOfDay where
195 |     arbitrary =
196 |         do h <- arbitrary
197 |            m <- arbitrary
198 |            s <- arbitrary
199 |            return (TimeOfDay (h `mod` 24) (m `mod` 60) (fromInteger (s `mod` 61)))
200 | 
201 | instance Arbitrary LocalTime where
202 |     arbitrary =
203 |         do d <- arbitrary
204 |            t <- arbitrary
205 |            return (LocalTime d t)
206 | 
207 | instance Arbitrary ZonedTime where
208 |     arbitrary =
209 |         do t <- arbitrary
210 |            z <- arbitrary
211 |            return (ZonedTime t z)
212 | 
213 | instance Arbitrary UniversalTime where
214 |     arbitrary =
215 |         do r <- arbitrary
216 |            return (ModJulianDate r)
217 | 
218 | instance (Arbitrary a, Arbitrary b) => Arbitrary (Tuple.Pair a b) where
219 |     arbitrary =
220 |         do x <- arbitrary
221 |            y <- arbitrary
222 |            return (x Tuple.:!: y)
223 | 
224 | instance Arbitrary Word128 where
225 |     arbitrary = Word128 <$> arbitrary <*> arbitrary
226 | 
227 | instance Arbitrary Word256 where
228 |     arbitrary = Word256 <$> arbitrary <*> arbitrary
229 | 


--------------------------------------------------------------------------------
/test/Data/LargeHashable/Tests/Inspection.hs:
--------------------------------------------------------------------------------
  1 | {-# LANGUAGE TemplateHaskell   #-}
  2 | {-# LANGUAGE DeriveGeneric     #-}
  3 | {-# OPTIONS_GHC -F -pgmF htfpp #-}
  4 | {-# OPTIONS_GHC -O -fplugin Test.Inspection.Plugin #-}
  5 | module Data.LargeHashable.Tests.Inspection where
  6 | 
  7 | import Test.Framework hiding ((===), Failure, Success)
  8 | import Test.Inspection
  9 | 
 10 | import Data.LargeHashable
 11 | import Data.LargeHashable.Tests.Helper
 12 | import Data.LargeHashable.Class
 13 | import GHC.Generics
 14 | 
 15 | genericUpdateHashTestA :: TestA -> LH ()
 16 | genericUpdateHashTestA = genericUpdateHash
 17 | 
 18 | test_genericProductGetsOptimized :: IO ()
 19 | test_genericProductGetsOptimized =
 20 |     case $(inspectTest (hasNoGenerics 'genericUpdateHashTestA)) of
 21 |       Success _ -> return ()
 22 |       Failure e -> assertFailure e
 23 | 
 24 | data SumTest
 25 |     = A Int
 26 |     | B Char
 27 |     | C Integer
 28 |     | D (Either Int Char)
 29 |     deriving (Generic)
 30 | 
 31 | $(deriveLargeHashable ''SumTest)
 32 | 
 33 | genericUpdateHashSum :: SumTest -> LH ()
 34 | genericUpdateHashSum = genericUpdateHash
 35 | 
 36 | thUpdateHashSum :: SumTest -> LH ()
 37 | thUpdateHashSum = updateHash
 38 | 
 39 | test_genericSumGetsOptimized :: IO ()
 40 | test_genericSumGetsOptimized =
 41 |     unitTestPending' "This currently doesn't hold" $
 42 |     case $(inspectTest (hasNoGenerics 'genericUpdateHashSum)) of
 43 |       Success _ -> return ()
 44 |       Failure e -> assertFailure e
 45 | 
 46 | test_genericSumEqTH :: IO ()
 47 | test_genericSumEqTH =
 48 |     unitTestPending' "This currently doesn't hold" $
 49 |     case $(inspectTest ('genericUpdateHashSum === 'thUpdateHashSum)) of
 50 |       Success _ -> return ()
 51 |       Failure e -> assertFailure e
 52 | 
 53 | data SopTest
 54 |     = A2 Char Int Bool
 55 |     | B2 Int Bool
 56 |     | C2 (Char, Int) (Maybe Char)
 57 |     | D2 (Either Int Char) (Char, Int, Integer)
 58 |     deriving (Generic)
 59 | 
 60 | $(deriveLargeHashable ''SopTest)
 61 | 
 62 | genericUpdateHashSop :: SopTest -> LH ()
 63 | genericUpdateHashSop = genericUpdateHash
 64 | 
 65 | thUpdateHashSop :: SopTest -> LH ()
 66 | thUpdateHashSop = updateHash
 67 | 
 68 | test_genericSumOfProductsGetsOptimized :: IO ()
 69 | test_genericSumOfProductsGetsOptimized =
 70 |     unitTestPending' "This currently doesn't hold" $
 71 |     case $(inspectTest (hasNoGenerics 'genericUpdateHashSop)) of
 72 |       Success _ -> return ()
 73 |       Failure e -> assertFailure e
 74 | 
 75 | test_genericSopEqTH :: IO ()
 76 | test_genericSopEqTH =
 77 |     unitTestPending' "This currently doesn't hold" $
 78 |     case $(inspectTest ('genericUpdateHashSop === 'thUpdateHashSop)) of
 79 |       Success _ -> return ()
 80 |       Failure e -> assertFailure e
 81 | 
 82 | data UnitTest = UnitTest deriving (Generic)
 83 | 
 84 | $(deriveLargeHashable ''UnitTest)
 85 | 
 86 | genericUpdateHashUnitType :: UnitTest -> LH ()
 87 | genericUpdateHashUnitType = genericUpdateHash
 88 | 
 89 | thUpdateHashUnitType :: UnitTest -> LH ()
 90 | thUpdateHashUnitType = updateHash
 91 | 
 92 | unitTypeReturn :: UnitTest -> LH ()
 93 | unitTypeReturn UnitTest = return ()
 94 | 
 95 | test_genericUnitHashIsNoop :: IO ()
 96 | test_genericUnitHashIsNoop =
 97 |     case $(inspectTest ('genericUpdateHashUnitType === 'unitTypeReturn)) of
 98 |       Success _ -> return ()
 99 |       Failure e -> assertFailure e
100 | 
101 | test_thUnitHashIsNoop :: IO ()
102 | test_thUnitHashIsNoop =
103 |     case $(inspectTest ('thUpdateHashUnitType === 'unitTypeReturn)) of
104 |       Success _ -> return ()
105 |       Failure e -> assertFailure e
106 | 
107 | updateHashHaskellUnit :: () -> LH ()
108 | updateHashHaskellUnit () =
109 |     -- I have to do this twice for inlining to work
110 |     do updateHash ()
111 |        updateHash ()
112 | 
113 | haskellUnitReturn :: () -> LH ()
114 | haskellUnitReturn () = return ()
115 | 
116 | test_haskellUnitHashIsNoop :: IO ()
117 | test_haskellUnitHashIsNoop =
118 |     case $(inspectTest ('updateHashHaskellUnit === 'haskellUnitReturn)) of
119 |       Success _ -> return ()
120 |       Failure e -> assertFailure e
121 | 


--------------------------------------------------------------------------------
/test/Data/LargeHashable/Tests/LargeWord.hs:
--------------------------------------------------------------------------------
 1 | {-# OPTIONS_GHC -F -pgmF htfpp #-}
 2 | module Data.LargeHashable.Tests.LargeWord where
 3 | 
 4 | -- keep imports in alphabetic order (in Emacs, use "M-x sort-lines")
 5 | import Data.LargeHashable.LargeWord
 6 | import Data.LargeHashable.Tests.Helper ()
 7 | import Test.Framework
 8 | import qualified Data.ByteString as BS
 9 | 
10 | prop_w128BsHasLength16 :: Word128 -> Bool
11 | prop_w128BsHasLength16 x = BS.length (w128ToBs x) == 16
12 | 
13 | prop_conversionFromAndToWord128 :: Word128 -> Bool
14 | prop_conversionFromAndToWord128 h128 = h128 == bsToW128 (w128ToBs h128)
15 | 
16 | prop_conversionFromAndToWord256 :: Word256 -> Bool
17 | prop_conversionFromAndToWord256 h256 = h256 == bsToW256 (w256ToBs h256)
18 | 
19 | test_bsToW128 :: IO ()
20 | test_bsToW128 =
21 |     do assertEqual (Word128 0 0) (bsToW128 BS.empty)
22 |        assertEqual (bsToW128 (BS.pack ([0,0,0,0,0,0,0,1] ++ replicate 8 0))) (bsToW128 (BS.pack [1]))
23 |        assertEqual (bsToW128 (BS.pack ([1,2,3,4,5,6,7,8,0,0,0,0,0,0,0,9])))
24 |            (bsToW128 (BS.pack [1,2,3,4,5,6,7,8,9]))
25 | 


--------------------------------------------------------------------------------
/test/Data/LargeHashable/Tests/MD5.hs:
--------------------------------------------------------------------------------
 1 | {-# OPTIONS_GHC -F -pgmF htfpp #-}
 2 | module Data.LargeHashable.Tests.MD5 where
 3 | 
 4 | import Data.LargeHashable.LargeWord
 5 | import Data.LargeHashable.MD5
 6 | import Data.LargeHashable.Tests.Helper ()
 7 | import Test.Framework
 8 | 
 9 | prop_readShowMD5Hash :: Word128 -> Bool
10 | prop_readShowMD5Hash w =
11 |     let h = MD5Hash w
12 |     in h == read (show h)
13 | 


--------------------------------------------------------------------------------
/test/Data/LargeHashable/Tests/Stable.hs:
--------------------------------------------------------------------------------
 1 | {-# LANGUAGE  OverloadedStrings #-}
 2 | {-# OPTIONS_GHC -F -pgmF htfpp #-}
 3 | module Data.LargeHashable.Tests.Stable where
 4 | 
 5 | import Data.LargeHashable
 6 | import qualified Data.Text as T
 7 | import qualified Data.Text.Lazy as TL
 8 | import Test.Framework
 9 | 
10 | test_strictTextBackwardsCompat :: IO ()
11 | test_strictTextBackwardsCompat = do
12 |     -- we check that largeHashStable returns the same hash as for text version 1.*
13 |     assertEqual (read "cc1ea58b3c2ec61d19c4ce222bc9e55d")
14 |         (largeHashStable md5HashAlgorithm (T.pack "Stefan"))
15 | 
16 | test_lazyTextBackwardsCompat :: IO ()
17 | test_lazyTextBackwardsCompat = do
18 |     -- we check that largeHashStable returns the same hash as for text version 1.*
19 |     assertEqual (read "cc1ea58b3c2ec61d19c4ce222bc9e55d")
20 |         (largeHashStable md5HashAlgorithm (TL.pack "Stefan"))
21 | 
22 | test_lazyTextStableChunking :: IO ()
23 | test_lazyTextStableChunking = do
24 |     assertEqual (largeHashStable md5HashAlgorithm (TL.fromChunks ["foo", "ba", "r"]))
25 |                 (largeHashStable md5HashAlgorithm (TL.fromChunks ["foob", "ar"]))
26 | 


--------------------------------------------------------------------------------
/test/Data/LargeHashable/Tests/TH.hs:
--------------------------------------------------------------------------------
  1 | {-# LANGUAGE GADTs #-}
  2 | {-# LANGUAGE TemplateHaskell #-}
  3 | {-# OPTIONS_GHC -F -pgmF htfpp #-}
  4 | {-# OPTIONS_GHC -ddump-splices #-}
  5 | module Data.LargeHashable.Tests.TH where
  6 | 
  7 | import Test.Framework
  8 | 
  9 | import Data.LargeHashable
 10 | 
 11 | -- | Simple test data structure that embodies most
 12 | --   of the diferrent features of a type
 13 | --   the TH deriver can encounter.
 14 | data BlaFoo a
 15 |     = Foo
 16 |     | Bar Int a
 17 |     | Baz a a a
 18 |     deriving (Eq, Show)
 19 | 
 20 | $(deriveLargeHashable ''BlaFoo)
 21 | 
 22 | instance Arbitrary a => Arbitrary (BlaFoo a) where
 23 |     arbitrary = oneof [ pure Foo
 24 |                       , Bar <$> arbitrary <*> arbitrary
 25 |                       , Baz <$> arbitrary <*> arbitrary <*> arbitrary
 26 |                       ]
 27 | 
 28 | -- | Simple property that tries to find hash collisions.
 29 | prop_thDerivedHashUnique :: BlaFoo Char -> BlaFoo Char -> Bool
 30 | prop_thDerivedHashUnique x y = (x == y) == (largeHash md5HashAlgorithm x == largeHash md5HashAlgorithm y)
 31 | 
 32 | prop_thDerivedHashStableUnique :: BlaFoo Char -> BlaFoo Char -> Bool
 33 | prop_thDerivedHashStableUnique x y =
 34 |     (x == y) == (largeHashStable md5HashAlgorithm x == largeHashStable md5HashAlgorithm y)
 35 | 
 36 | newtype Fool = Fool { unFool :: Bool }
 37 | 
 38 | $(deriveLargeHashable ''Fool)
 39 | 
 40 | -- | Simple sanity check for TH derived instances for newtypes.
 41 | test_newtypeTHHashSane :: IO ()
 42 | test_newtypeTHHashSane = assertNotEqual (largeHash md5HashAlgorithm (Fool True))
 43 |                                         (largeHash md5HashAlgorithm (Fool False))
 44 | 
 45 | test_newtypeTHHashStableSane :: IO ()
 46 | test_newtypeTHHashStableSane = assertNotEqual (largeHashStable md5HashAlgorithm (Fool True))
 47 |                                               (largeHashStable md5HashAlgorithm (Fool False))
 48 | 
 49 | data HigherKinded t = HigherKinded (t String)
 50 | 
 51 | instance LargeHashable' BlaFoo where
 52 |     updateHash' = updateHash
 53 |     updateHashStable' = updateHashStable
 54 | 
 55 | instance LargeHashable' t => LargeHashable (HigherKinded t) where
 56 |     updateHash (HigherKinded x) =
 57 |         updateHash' x
 58 |     updateHashStable (HigherKinded x) =
 59 |         updateHashStable' x
 60 | 
 61 | test_higherKinded :: IO ()
 62 | test_higherKinded =
 63 |     assertNotEqual
 64 |         (largeHash md5HashAlgorithm (HigherKinded (Bar 42 "Stefan")))
 65 |         (largeHash md5HashAlgorithm (HigherKinded (Bar 5 "Stefan")))
 66 | 
 67 | test_higherKindedStable :: IO ()
 68 | test_higherKindedStable =
 69 |     assertNotEqual
 70 |         (largeHashStable md5HashAlgorithm (HigherKinded (Bar 42 "Stefan")))
 71 |         (largeHashStable md5HashAlgorithm (HigherKinded (Bar 5 "Stefan")))
 72 | 
 73 | data GadtNoArgs where
 74 |     GadtNoArgsA :: Int -> Char -> GadtNoArgs
 75 |     GadtNoArgsB :: Integer -> GadtNoArgs
 76 | 
 77 | $(deriveLargeHashable ''GadtNoArgs)
 78 | 
 79 | test_gadtNoArgs :: IO ()
 80 | test_gadtNoArgs =
 81 |     assertNotEqual
 82 |         (largeHash md5HashAlgorithm (GadtNoArgsA 1 'a'))
 83 |         (largeHash md5HashAlgorithm (GadtNoArgsB 1))
 84 | 
 85 | data GadtOneArg a where
 86 |     GadtOneArgA :: Int -> Char -> GadtOneArg Integer
 87 |     GadtOneArgB :: Integer -> GadtOneArg Char
 88 | 
 89 | $(deriveLargeHashable ''GadtOneArg)
 90 | 
 91 | test_gadtOneArg :: IO ()
 92 | test_gadtOneArg =
 93 |     assertNotEqual
 94 |         (largeHash md5HashAlgorithm (GadtOneArgA 1 'a'))
 95 |         (largeHash md5HashAlgorithm (GadtOneArgB 1))
 96 | 
 97 | 
 98 | data GadtMultipleArgs a b c where
 99 |     GadtMultipleArgsA :: Int -> Char -> GadtMultipleArgs String Integer Int
100 |     GadtMultipleArgsB :: Int -> GadtMultipleArgs Char String Integer
101 | 
102 | $(deriveLargeHashable ''GadtMultipleArgs)
103 | 
104 | test_gadtMultipleArgs :: IO ()
105 | test_gadtMultipleArgs =
106 |     assertNotEqual
107 |         (largeHash md5HashAlgorithm (GadtMultipleArgsA (1::Int) 'a'))
108 |         (largeHash md5HashAlgorithm (GadtMultipleArgsB (1::Int)))
109 | 


--------------------------------------------------------------------------------
/test/Main.hs:
--------------------------------------------------------------------------------
 1 | {-# OPTIONS_GHC -F -pgmF htfpp #-}
 2 | module Main where
 3 | 
 4 | import Test.Framework
 5 | 
 6 | -- In Emacs sort block with M-x sort-lines
 7 | import {-@ HTF_TESTS @-} Data.LargeHashable.Tests.Class
 8 | import {-@ HTF_TESTS @-} Data.LargeHashable.Tests.Inspection
 9 | import {-@ HTF_TESTS @-} Data.LargeHashable.Tests.LargeWord
10 | import {-@ HTF_TESTS @-} Data.LargeHashable.Tests.MD5
11 | import {-@ HTF_TESTS @-} Data.LargeHashable.Tests.Stable
12 | import {-@ HTF_TESTS @-} Data.LargeHashable.Tests.TH
13 | 
14 | allTests :: [TestSuite]
15 | allTests = htf_importedTests
16 | 
17 | main :: IO ()
18 | main = htfMain allTests
19 | 


--------------------------------------------------------------------------------