├── .github └── workflows │ └── haskell-ci.yml ├── .gitignore ├── CHANGELOG.md ├── LICENSE ├── Setup.lhs ├── bench └── simple │ ├── Blocks.hs │ ├── LICENSE │ ├── Main.hs │ └── RadSieve.hs ├── cabal.haskell-ci ├── cabal.project ├── include └── vector.h ├── src └── Data │ └── Vector │ ├── Algorithms.hs │ └── Algorithms │ ├── AmericanFlag.hs │ ├── Combinators.hs │ ├── Common.hs │ ├── Heap.hs │ ├── Insertion.hs │ ├── Intro.hs │ ├── Merge.hs │ ├── Optimal.hs │ ├── Radix.hs │ ├── Search.hs │ └── Tim.hs ├── tests └── properties │ ├── Optimal.hs │ ├── Properties.hs │ ├── Tests.hs │ └── Util.hs └── vector-algorithms.cabal /.github/workflows/haskell-ci.yml: -------------------------------------------------------------------------------- 1 | # This GitHub workflow config has been generated by a script via 2 | # 3 | # haskell-ci 'github' 'vector-algorithms.cabal' 4 | # 5 | # To regenerate the script (for example after adjusting tested-with) run 6 | # 7 | # haskell-ci regenerate 8 | # 9 | # For more information, see https://github.com/haskell-CI/haskell-ci 10 | # 11 | # version: 0.19.20250115 12 | # 13 | # REGENDATA ("0.19.20250115",["github","vector-algorithms.cabal"]) 14 | # 15 | name: Haskell-CI 16 | on: 17 | push: 18 | branches: 19 | - master 20 | pull_request: 21 | branches: 22 | - master 23 | jobs: 24 | linux: 25 | name: Haskell-CI - Linux - ${{ matrix.compiler }} 26 | runs-on: ubuntu-20.04 27 | timeout-minutes: 28 | 60 29 | container: 30 | image: buildpack-deps:jammy 31 | continue-on-error: ${{ matrix.allow-failure }} 32 | strategy: 33 | matrix: 34 | include: 35 | - compiler: ghc-9.12.1 36 | compilerKind: ghc 37 | compilerVersion: 9.12.1 38 | setup-method: ghcup 39 | allow-failure: false 40 | - compiler: ghc-9.10.1 41 | compilerKind: ghc 42 | compilerVersion: 9.10.1 43 | setup-method: ghcup 44 | allow-failure: false 45 | - compiler: ghc-9.8.2 46 | compilerKind: ghc 47 | compilerVersion: 9.8.2 48 | setup-method: ghcup 49 | allow-failure: false 50 | - compiler: ghc-9.6.3 51 | compilerKind: ghc 52 | compilerVersion: 9.6.3 53 | setup-method: ghcup 54 | allow-failure: false 55 | - compiler: ghc-9.4.7 56 | compilerKind: ghc 57 | compilerVersion: 9.4.7 58 | setup-method: ghcup 59 | allow-failure: false 60 | - compiler: ghc-9.2.8 61 | compilerKind: ghc 62 | compilerVersion: 9.2.8 63 | setup-method: ghcup 64 | allow-failure: false 65 | - compiler: ghc-9.0.2 66 | compilerKind: ghc 67 | compilerVersion: 9.0.2 68 | setup-method: ghcup 69 | allow-failure: false 70 | - compiler: ghc-8.10.7 71 | compilerKind: ghc 72 | compilerVersion: 8.10.7 73 | setup-method: ghcup 74 | allow-failure: false 75 | - compiler: ghc-8.8.4 76 | compilerKind: ghc 77 | compilerVersion: 8.8.4 78 | setup-method: ghcup 79 | allow-failure: false 80 | - compiler: ghc-8.6.5 81 | compilerKind: ghc 82 | compilerVersion: 8.6.5 83 | setup-method: ghcup 84 | allow-failure: false 85 | - compiler: ghc-8.4.4 86 | compilerKind: ghc 87 | compilerVersion: 8.4.4 88 | setup-method: ghcup 89 | allow-failure: false 90 | - compiler: ghc-8.2.2 91 | compilerKind: ghc 92 | compilerVersion: 8.2.2 93 | setup-method: ghcup 94 | allow-failure: false 95 | fail-fast: false 96 | steps: 97 | - name: apt-get install 98 | run: | 99 | apt-get update 100 | apt-get install -y --no-install-recommends gnupg ca-certificates dirmngr curl git software-properties-common libtinfo5 libnuma-dev 101 | - name: Install GHCup 102 | run: | 103 | mkdir -p "$HOME/.ghcup/bin" 104 | curl -sL https://downloads.haskell.org/ghcup/0.1.30.0/x86_64-linux-ghcup-0.1.30.0 > "$HOME/.ghcup/bin/ghcup" 105 | chmod a+x "$HOME/.ghcup/bin/ghcup" 106 | - name: Install cabal-install 107 | run: | 108 | "$HOME/.ghcup/bin/ghcup" install cabal 3.12.1.0 || (cat "$HOME"/.ghcup/logs/*.* && false) 109 | echo "CABAL=$HOME/.ghcup/bin/cabal-3.12.1.0 -vnormal+nowrap" >> "$GITHUB_ENV" 110 | - name: Install GHC (GHCup) 111 | if: matrix.setup-method == 'ghcup' 112 | run: | 113 | "$HOME/.ghcup/bin/ghcup" install ghc "$HCVER" || (cat "$HOME"/.ghcup/logs/*.* && false) 114 | HC=$("$HOME/.ghcup/bin/ghcup" whereis ghc "$HCVER") 115 | HCPKG=$(echo "$HC" | sed 's#ghc$#ghc-pkg#') 116 | HADDOCK=$(echo "$HC" | sed 's#ghc$#haddock#') 117 | echo "HC=$HC" >> "$GITHUB_ENV" 118 | echo "HCPKG=$HCPKG" >> "$GITHUB_ENV" 119 | echo "HADDOCK=$HADDOCK" >> "$GITHUB_ENV" 120 | env: 121 | HCKIND: ${{ matrix.compilerKind }} 122 | HCNAME: ${{ matrix.compiler }} 123 | HCVER: ${{ matrix.compilerVersion }} 124 | - name: Set PATH and environment variables 125 | run: | 126 | echo "$HOME/.cabal/bin" >> $GITHUB_PATH 127 | echo "LANG=C.UTF-8" >> "$GITHUB_ENV" 128 | echo "CABAL_DIR=$HOME/.cabal" >> "$GITHUB_ENV" 129 | echo "CABAL_CONFIG=$HOME/.cabal/config" >> "$GITHUB_ENV" 130 | HCNUMVER=$(${HC} --numeric-version|perl -ne '/^(\d+)\.(\d+)\.(\d+)(\.(\d+))?$/; print(10000 * $1 + 100 * $2 + ($3 == 0 ? $5 != 1 : $3))') 131 | echo "HCNUMVER=$HCNUMVER" >> "$GITHUB_ENV" 132 | echo "ARG_TESTS=--enable-tests" >> "$GITHUB_ENV" 133 | echo "ARG_BENCH=--enable-benchmarks" >> "$GITHUB_ENV" 134 | echo "HEADHACKAGE=false" >> "$GITHUB_ENV" 135 | echo "ARG_COMPILER=--$HCKIND --with-compiler=$HC" >> "$GITHUB_ENV" 136 | env: 137 | HCKIND: ${{ matrix.compilerKind }} 138 | HCNAME: ${{ matrix.compiler }} 139 | HCVER: ${{ matrix.compilerVersion }} 140 | - name: env 141 | run: | 142 | env 143 | - name: write cabal config 144 | run: | 145 | mkdir -p $CABAL_DIR 146 | cat >> $CABAL_CONFIG <> $CABAL_CONFIG < cabal-plan.xz 179 | echo 'f62ccb2971567a5f638f2005ad3173dba14693a45154c1508645c52289714cb2 cabal-plan.xz' | sha256sum -c - 180 | xz -d < cabal-plan.xz > $HOME/.cabal/bin/cabal-plan 181 | rm -f cabal-plan.xz 182 | chmod a+x $HOME/.cabal/bin/cabal-plan 183 | cabal-plan --version 184 | - name: checkout 185 | uses: actions/checkout@v4 186 | with: 187 | path: source 188 | - name: initial cabal.project for sdist 189 | run: | 190 | touch cabal.project 191 | echo "packages: $GITHUB_WORKSPACE/source/." >> cabal.project 192 | cat cabal.project 193 | - name: sdist 194 | run: | 195 | mkdir -p sdist 196 | $CABAL sdist all --output-dir $GITHUB_WORKSPACE/sdist 197 | - name: unpack 198 | run: | 199 | mkdir -p unpacked 200 | find sdist -maxdepth 1 -type f -name '*.tar.gz' -exec tar -C $GITHUB_WORKSPACE/unpacked -xzvf {} \; 201 | - name: generate cabal.project 202 | run: | 203 | PKGDIR_vector_algorithms="$(find "$GITHUB_WORKSPACE/unpacked" -maxdepth 1 -type d -regex '.*/vector-algorithms-[0-9.]*')" 204 | echo "PKGDIR_vector_algorithms=${PKGDIR_vector_algorithms}" >> "$GITHUB_ENV" 205 | rm -f cabal.project cabal.project.local 206 | touch cabal.project 207 | touch cabal.project.local 208 | echo "packages: ${PKGDIR_vector_algorithms}" >> cabal.project 209 | echo "package vector-algorithms" >> cabal.project 210 | echo " ghc-options: -Werror=missing-methods" >> cabal.project 211 | cat >> cabal.project <> cabal.project.local 214 | cat cabal.project 215 | cat cabal.project.local 216 | - name: dump install plan 217 | run: | 218 | $CABAL v2-build $ARG_COMPILER $ARG_TESTS $ARG_BENCH --dry-run all 219 | cabal-plan 220 | - name: restore cache 221 | uses: actions/cache/restore@v4 222 | with: 223 | key: ${{ runner.os }}-${{ matrix.compiler }}-${{ github.sha }} 224 | path: ~/.cabal/store 225 | restore-keys: ${{ runner.os }}-${{ matrix.compiler }}- 226 | - name: install dependencies 227 | run: | 228 | $CABAL v2-build $ARG_COMPILER --disable-tests --disable-benchmarks --dependencies-only -j2 all 229 | $CABAL v2-build $ARG_COMPILER $ARG_TESTS $ARG_BENCH --dependencies-only -j2 all 230 | - name: build w/o tests 231 | run: | 232 | $CABAL v2-build $ARG_COMPILER --disable-tests --disable-benchmarks all 233 | - name: build 234 | run: | 235 | $CABAL v2-build $ARG_COMPILER $ARG_TESTS $ARG_BENCH all --write-ghc-environment-files=always 236 | - name: tests 237 | run: | 238 | $CABAL v2-test $ARG_COMPILER $ARG_TESTS $ARG_BENCH all --test-show-details=direct 239 | - name: cabal check 240 | run: | 241 | cd ${PKGDIR_vector_algorithms} || false 242 | ${CABAL} -vnormal check 243 | - name: haddock 244 | run: | 245 | $CABAL v2-haddock --disable-documentation --haddock-all $ARG_COMPILER --with-haddock $HADDOCK $ARG_TESTS $ARG_BENCH all 246 | - name: unconstrained build 247 | run: | 248 | rm -f cabal.project.local 249 | $CABAL v2-build $ARG_COMPILER --disable-tests --disable-benchmarks all 250 | - name: save cache 251 | if: always() 252 | uses: actions/cache/save@v4 253 | with: 254 | key: ${{ runner.os }}-${{ matrix.compiler }}-${{ github.sha }} 255 | path: ~/.cabal/store 256 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | dist 2 | dist-* 3 | cabal-dev 4 | *.o 5 | *.hi 6 | *.chi 7 | *.chs.h 8 | *.dyn_o 9 | *.dyn_hi 10 | .hpc 11 | .hsenv 12 | .cabal-sandbox/ 13 | cabal.sandbox.config 14 | *.prof 15 | *.aux 16 | *.hp 17 | *.eventlog 18 | .stack-work/ 19 | cabal.project.local 20 | cabal.project.local~ 21 | .HTF/ 22 | .ghc.environment.* 23 | -------------------------------------------------------------------------------- /CHANGELOG.md: -------------------------------------------------------------------------------- 1 | ## Version 0.9.1.0 (2025-02-05) 2 | 3 | - More inlining for `sort` and `nib` functions. 4 | 5 | ## Version 0.9.0.3 (2024-11-25) 6 | 7 | - Fix an off-by-one error Heap.partialSort functions. 8 | - Support latest ghcs. 9 | 10 | ## Version 0.9.0.2 (2024-05-23) 11 | 12 | - Add `TypeOperators` pragma where needed. 13 | 14 | ## Version 0.9.0.1 (2022-07-28) 15 | 16 | - Allow building with vector-0.13.*. 17 | 18 | ## Version 0.9.0.0 (2022-05-19) 19 | 20 | - Add nub related functions. 21 | - Add sortUniq related functions (sorts, then removes duplicates). 22 | 23 | ## Version 0.8.0.4 (2020-12-06) 24 | 25 | - Fix out of range access in Intro.partialSort. 26 | - Update QuickCheck dependency bounds. 27 | 28 | ## Version 0.8.0.3 (2019-12-02) 29 | 30 | - Fix out-of-bounds access in Timsort. 31 | 32 | ## Version 0.8.0.2 (2019-11-28) 33 | 34 | - Bump upper bounds on primitive and QuickCheck. 35 | - Expose 'terminate' function from 'AmericanFlag' module. 36 | - Fix an off-by-one error in Data.Vector.Algorithms.Heaps.heapInsert. 37 | 38 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Copyright (c) 2015 Dan Doel 2 | Copyright (c) 2015 Tim Baumann 3 | 4 | All rights reserved. 5 | 6 | Redistribution and use in source and binary forms, with or without 7 | modification, are permitted provided that the following conditions 8 | are met: 9 | 10 | 1. Redistributions of source code must retain the above copyright 11 | notice, this list of conditions and the following disclaimer. 12 | 13 | 2. Redistributions in binary form must reproduce the above copyright 14 | notice, this list of conditions and the following disclaimer in the 15 | documentation and/or other materials provided with the distribution. 16 | 17 | 3. Neither the name of the author nor the names of his contributors 18 | may be used to endorse or promote products derived from this software 19 | without specific prior written permission. 20 | 21 | THIS SOFTWARE IS PROVIDED BY THE AUTHORS ``AS IS'' AND ANY EXPRESS OR 22 | IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 23 | WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 24 | DISCLAIMED. IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE FOR 25 | ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 26 | DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 27 | OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 28 | HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, 29 | STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN 30 | ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 31 | POSSIBILITY OF SUCH DAMAGE. 32 | 33 | ------------------------------------------------------------------------------ 34 | 35 | The code in Data.Array.Vector.Algorithms.Mutable.Optimal is adapted from a C 36 | algorithm for the same purpose. The following is the copyright notice for said 37 | C code: 38 | 39 | Copyright (c) 2004 Paul Hsieh 40 | All rights reserved. 41 | 42 | Redistribution and use in source and binary forms, with or without 43 | modification, are permitted provided that the following conditions are met: 44 | 45 | Redistributions of source code must retain the above copyright notice, 46 | this list of conditions and the following disclaimer. 47 | 48 | Redistributions in binary form must reproduce the above copyright notice, 49 | this list of conditions and the following disclaimer in the documentation 50 | and/or other materials provided with the distribution. 51 | 52 | Neither the name of sorttest nor the names of its contributors may be 53 | used to endorse or promote products derived from this software without 54 | specific prior written permission. 55 | 56 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 57 | AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 58 | IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 59 | ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE 60 | LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 61 | CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 62 | SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 63 | INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 64 | CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 65 | ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 66 | POSSIBILITY OF SUCH DAMAGE. 67 | -------------------------------------------------------------------------------- /Setup.lhs: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env runhaskell 2 | > import Distribution.Simple 3 | > main = defaultMain 4 | -------------------------------------------------------------------------------- /bench/simple/Blocks.hs: -------------------------------------------------------------------------------- 1 | {-# LANGUAGE Rank2Types #-} 2 | 3 | module Blocks where 4 | 5 | import Control.Monad 6 | import Control.Monad.ST 7 | 8 | import Data.Vector.Unboxed.Mutable 9 | 10 | import System.CPUTime 11 | 12 | import System.Random.MWC (GenIO, Variate(..)) 13 | 14 | -- Some conveniences for doing evil stuff in the ST monad. 15 | -- All the tests get run in IO, but uvector stuff happens 16 | -- in ST, so we temporarily coerce. 17 | clock :: IO Integer 18 | clock = getCPUTime 19 | 20 | -- Strategies for filling the initial arrays 21 | rand :: Variate e => GenIO -> Int -> IO e 22 | rand g _ = uniform g 23 | 24 | ascend :: Num e => Int -> IO e 25 | ascend = return . fromIntegral 26 | 27 | descend :: Num e => e -> Int -> IO e 28 | descend m n = return $ m - fromIntegral n 29 | 30 | modulo :: Integral e => e -> Int -> IO e 31 | modulo m n = return $ fromIntegral n `mod` m 32 | 33 | -- This is the worst case for the median-of-three quicksort 34 | -- used in the introsort implementation. 35 | medianKiller :: Integral e => e -> Int -> IO e 36 | medianKiller m n' 37 | | n < k = return $ if even n then n + 1 else n + k 38 | | otherwise = return $ (n - k + 1) * 2 39 | where 40 | n = fromIntegral n' 41 | k = m `div` 2 42 | {-# INLINE medianKiller #-} 43 | 44 | initialize :: (Unbox e) => MVector RealWorld e -> Int -> (Int -> IO e) -> IO () 45 | initialize arr len fill = initial $ len - 1 46 | where initial n = fill n >>= unsafeWrite arr n >> when (n > 0) (initial $ n - 1) 47 | {-# INLINE initialize #-} 48 | 49 | speedTest :: (Unbox e) => MVector RealWorld e 50 | -> Int 51 | -> (Int -> IO e) 52 | -> (MVector RealWorld e -> IO ()) 53 | -> IO Integer 54 | speedTest arr n fill algo = do 55 | initialize arr n fill 56 | t0 <- clock 57 | algo arr 58 | t1 <- clock 59 | return $ t1 - t0 60 | {-# INLINE speedTest #-} 61 | 62 | 63 | -------------------------------------------------------------------------------- /bench/simple/LICENSE: -------------------------------------------------------------------------------- 1 | Copyright (c) 2009 Dan Doel 2 | 3 | All rights reserved. 4 | 5 | Redistribution and use in source and binary forms, with or without 6 | modification, are permitted provided that the following conditions 7 | are met: 8 | 9 | 1. Redistributions of source code must retain the above copyright 10 | notice, this list of conditions and the following disclaimer. 11 | 12 | 2. Redistributions in binary form must reproduce the above copyright 13 | notice, this list of conditions and the following disclaimer in the 14 | documentation and/or other materials provided with the distribution. 15 | 16 | 3. Neither the name of the author nor the names of his contributors 17 | may be used to endorse or promote products derived from this software 18 | without specific prior written permission. 19 | 20 | THIS SOFTWARE IS PROVIDED BY THE AUTHORS ``AS IS'' AND ANY EXPRESS OR 21 | IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 22 | WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 23 | DISCLAIMED. IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE FOR 24 | ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 25 | DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 26 | OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 27 | HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, 28 | STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN 29 | ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 30 | POSSIBILITY OF SUCH DAMAGE. 31 | -------------------------------------------------------------------------------- /bench/simple/Main.hs: -------------------------------------------------------------------------------- 1 | {-# LANGUAGE Rank2Types #-} 2 | 3 | module Main (main) where 4 | 5 | import Prelude hiding (read, length) 6 | import qualified Prelude as P 7 | 8 | import Control.Monad 9 | import Control.Monad.ST 10 | 11 | import Data.Char 12 | import Data.Ord (comparing) 13 | import Data.List (maximumBy) 14 | 15 | import qualified Data.Vector.Unboxed.Mutable as UVector 16 | import Data.Vector.Unboxed.Mutable (MVector, Unbox) 17 | 18 | import qualified Data.Vector.Algorithms.Insertion as INS 19 | import qualified Data.Vector.Algorithms.Intro as INT 20 | import qualified Data.Vector.Algorithms.Heap as H 21 | import qualified Data.Vector.Algorithms.Merge as M 22 | import qualified Data.Vector.Algorithms.Radix as R 23 | import qualified Data.Vector.Algorithms.AmericanFlag as AF 24 | import qualified Data.Vector.Algorithms.Tim as T 25 | 26 | import System.Environment 27 | import System.Console.GetOpt 28 | import System.Random.MWC 29 | 30 | import Blocks 31 | 32 | -- Does nothing. For testing the speed/heap allocation of the building blocks. 33 | noalgo :: (Unbox e) => MVector RealWorld e -> IO () 34 | noalgo _ = return () 35 | 36 | -- Allocates a temporary buffer, like mergesort for similar purposes as noalgo. 37 | alloc :: (Unbox e) => MVector RealWorld e -> IO () 38 | alloc arr | len <= 4 = arr `seq` return () 39 | | otherwise = (UVector.new (len `div` 2) :: IO (MVector RealWorld Int)) >> return () 40 | where len = UVector.length arr 41 | 42 | displayTime :: String -> Integer -> IO () 43 | displayTime s elapsed = putStrLn $ 44 | s ++ " : " ++ show (fromIntegral elapsed / (1e12 :: Double)) ++ " seconds" 45 | 46 | run :: String -> IO Integer -> IO () 47 | run s t = t >>= displayTime s 48 | 49 | sortSuite :: String -> GenIO -> Int -> (MVector RealWorld Int -> IO ()) -> IO () 50 | sortSuite str g n sort = do 51 | arr <- UVector.new n 52 | putStrLn $ "Testing: " ++ str 53 | run "Random " $ speedTest arr n (rand g >=> modulo n) sort 54 | run "Sorted " $ speedTest arr n ascend sort 55 | run "Reverse-sorted " $ speedTest arr n (descend n) sort 56 | run "Random duplicates " $ speedTest arr n (rand g >=> modulo 1000) sort 57 | let m = 4 * (n `div` 4) 58 | run "Median killer " $ speedTest arr m (medianKiller m) sort 59 | 60 | partialSortSuite :: String -> GenIO -> Int -> Int 61 | -> (MVector RealWorld Int -> Int -> IO ()) -> IO () 62 | partialSortSuite str g n k sort = sortSuite str g n (\a -> sort a k) 63 | 64 | -- ----------------- 65 | -- Argument handling 66 | -- ----------------- 67 | 68 | data Algorithm = DoNothing 69 | | Allocate 70 | | InsertionSort 71 | | IntroSort 72 | | IntroPartialSort 73 | | IntroSelect 74 | | HeapSort 75 | | HeapPartialSort 76 | | HeapSelect 77 | | MergeSort 78 | | RadixSort 79 | | AmericanFlagSort 80 | | TimSort 81 | deriving (Show, Read, Enum, Bounded) 82 | 83 | data Options = O { algos :: [Algorithm], elems :: Int, portion :: Int, usage :: Bool } deriving (Show) 84 | 85 | defaultOptions :: Options 86 | defaultOptions = O [] 10000 1000 False 87 | 88 | type OptionsT = Options -> Either String Options 89 | 90 | options :: [OptDescr OptionsT] 91 | options = [ Option ['A'] ["algorithm"] (ReqArg parseAlgo "ALGO") 92 | ("Specify an algorithm to be run. Options:\n" ++ algoOpts) 93 | , Option ['n'] ["num-elems"] (ReqArg parseN "INT") 94 | "Specify the size of arrays in algorithms." 95 | , Option ['k'] ["portion"] (ReqArg parseK "INT") 96 | "Specify the number of elements to partial sort/select in\nrelevant algorithms." 97 | , Option ['?','v'] ["help"] (NoArg $ \o -> Right $ o { usage = True }) 98 | "Show options." 99 | ] 100 | where 101 | allAlgos :: [Algorithm] 102 | allAlgos = [minBound .. maxBound] 103 | algoOpts = fmt allAlgos 104 | fmt (x:y:zs) = '\t' : pad (show x) ++ show y ++ "\n" ++ fmt zs 105 | fmt [x] = '\t' : show x ++ "\n" 106 | fmt [] = "" 107 | size = (" " ++) . maximumBy (comparing P.length) . map show $ allAlgos 108 | pad str = zipWith const (str ++ repeat ' ') size 109 | 110 | parseAlgo :: String -> Options -> Either String Options 111 | parseAlgo "None" o = Right $ o { algos = [] } 112 | parseAlgo "All" o = Right $ o { algos = [DoNothing .. AmericanFlagSort] } 113 | parseAlgo s o = leftMap (\e -> "Unrecognized algorithm `" ++ e ++ "'") 114 | . fmap (\v -> o { algos = v : algos o }) $ readEither s 115 | 116 | leftMap :: (a -> b) -> Either a c -> Either b c 117 | leftMap f (Left a) = Left (f a) 118 | leftMap _ (Right c) = Right c 119 | 120 | parseNum :: (Int -> Options) -> String -> Either String Options 121 | parseNum f = leftMap (\e -> "Invalid numeric argument `" ++ e ++ "'") . fmap f . readEither 122 | 123 | parseN, parseK :: String -> Options -> Either String Options 124 | parseN s o = parseNum (\n -> o { elems = n }) s 125 | parseK s o = parseNum (\k -> o { portion = k }) s 126 | 127 | readEither :: Read a => String -> Either String a 128 | readEither s = case reads s of 129 | [(x,t)] | all isSpace t -> Right x 130 | _ -> Left s 131 | 132 | runTest :: GenIO -> Int -> Int -> Algorithm -> IO () 133 | runTest g n k alg = case alg of 134 | DoNothing -> sortSuite "no algorithm" g n noalgo 135 | Allocate -> sortSuite "allocate" g n alloc 136 | InsertionSort -> sortSuite "insertion sort" g n insertionSort 137 | IntroSort -> sortSuite "introsort" g n introSort 138 | IntroPartialSort -> partialSortSuite "partial introsort" g n k introPSort 139 | IntroSelect -> partialSortSuite "introselect" g n k introSelect 140 | HeapSort -> sortSuite "heap sort" g n heapSort 141 | HeapPartialSort -> partialSortSuite "partial heap sort" g n k heapPSort 142 | HeapSelect -> partialSortSuite "heap select" g n k heapSelect 143 | MergeSort -> sortSuite "merge sort" g n mergeSort 144 | RadixSort -> sortSuite "radix sort" g n radixSort 145 | AmericanFlagSort -> sortSuite "flag sort" g n flagSort 146 | TimSort -> sortSuite "tim sort" g n timSort 147 | 148 | mergeSort :: MVector RealWorld Int -> IO () 149 | mergeSort v = M.sort v 150 | {-# NOINLINE mergeSort #-} 151 | 152 | introSort :: MVector RealWorld Int -> IO () 153 | introSort v = INT.sort v 154 | {-# NOINLINE introSort #-} 155 | 156 | introPSort :: MVector RealWorld Int -> Int -> IO () 157 | introPSort v k = INT.partialSort v k 158 | {-# NOINLINE introPSort #-} 159 | 160 | introSelect :: MVector RealWorld Int -> Int -> IO () 161 | introSelect v k = INT.select v k 162 | {-# NOINLINE introSelect #-} 163 | 164 | heapSort :: MVector RealWorld Int -> IO () 165 | heapSort v = H.sort v 166 | {-# NOINLINE heapSort #-} 167 | 168 | heapPSort :: MVector RealWorld Int -> Int -> IO () 169 | heapPSort v k = H.partialSort v k 170 | {-# NOINLINE heapPSort #-} 171 | 172 | heapSelect :: MVector RealWorld Int -> Int -> IO () 173 | heapSelect v k = H.select v k 174 | {-# NOINLINE heapSelect #-} 175 | 176 | insertionSort :: MVector RealWorld Int -> IO () 177 | insertionSort v = INS.sort v 178 | {-# NOINLINE insertionSort #-} 179 | 180 | radixSort :: MVector RealWorld Int -> IO () 181 | radixSort v = R.sort v 182 | {-# NOINLINE radixSort #-} 183 | 184 | flagSort :: MVector RealWorld Int -> IO () 185 | flagSort v = AF.sort v 186 | {-# NOINLINE flagSort #-} 187 | 188 | timSort :: MVector RealWorld Int -> IO () 189 | timSort v = T.sort v 190 | {-# NOINLINE timSort #-} 191 | 192 | main :: IO () 193 | main = getArgs >>= \args -> withSystemRandom $ \gen -> 194 | case getOpt Permute options args of 195 | (fs, _, []) -> case foldl (>>=) (Right defaultOptions) fs of 196 | Left err -> putStrLn $ usageInfo err options 197 | Right opts | not (usage opts) -> 198 | mapM_ (runTest gen (elems opts) (portion opts)) (algos opts) 199 | | otherwise -> putStrLn $ usageInfo "vector-algorithms-bench" options 200 | (_, _, errs) -> putStrLn $ usageInfo (concat errs) options 201 | 202 | 203 | -------------------------------------------------------------------------------- /bench/simple/RadSieve.hs: -------------------------------------------------------------------------------- 1 | -- ------------------------------------------------------------------ 2 | -- 3 | -- Module : RadSieve 4 | -- Copyright : (c) 2009 Dan Doel 5 | -- 6 | -- ------------------------------------------------------------------ 7 | -- An implementation of a radical sieve, inspired by solving Project 8 | -- Euler problem #124. 9 | -- 10 | -- Reproduction fo the problem text: 11 | -- 12 | -- The radical of n, rad(n), is the product of distinct prime factors 13 | -- of n. For example, 504 = 23 × 32 × 7, so rad(504) = 2 × 3 × 7 = 42. 14 | -- 15 | -- If we calculate rad(n) for 1 ≤ n ≤ 10, then sort them on rad(n), 16 | -- and sorting on n if the radical values are equal, we get: 17 | -- 18 | -- Unsorted Sorted 19 | -- n rad(n) n rad(n) k 20 | -- 1 1 1 1 1 21 | -- 2 2 2 2 2 22 | -- 3 3 4 2 3 23 | -- 4 2 8 2 4 24 | -- 5 5 3 3 5 25 | -- 6 6 9 3 6 26 | -- 7 7 5 5 7 27 | -- 8 2 6 6 8 28 | -- 9 3 7 7 9 29 | -- 10 10 10 10 10 30 | -- 31 | -- Let E(k) be the kth element in the sorted n column; for example, 32 | -- E(4) = 8 and E(6) = 9. 33 | -- 34 | -- If rad(n) is sorted for 1 ≤ n ≤ 100000, find E(10000). 35 | 36 | module RadSieve where 37 | 38 | import Control.Monad 39 | import Control.Monad.ST 40 | 41 | import Data.Array.Vector 42 | 43 | -- Radicals can be sieved as follows: 44 | -- set a[1,n] = 1 45 | -- for i from 2 to n 46 | -- if a[i] == 1 -- i must be prime 47 | -- then a[j*i] *= i for positive integers j, j*i <= n 48 | -- else do nothing -- i is composite, so its prime factors 49 | -- -- have been accounted for 50 | -- 51 | -- This sieves for radicals up to the given integer. 52 | radSieve :: Int -> ST s (MUArr Int s) 53 | radSieve n = do arr <- newMU (n + 1) 54 | fill arr n 55 | sieve arr 1 56 | return arr 57 | where 58 | fill arr i | i < 0 = return () 59 | | otherwise = writeMU arr i 1 >> fill arr (i-1) 60 | sieve arr i | n < i = return () 61 | | otherwise = do e <- readMU arr i 62 | when (e == 1) $ mark arr i i 63 | sieve arr (i+1) 64 | mark arr p j | n < j = return () 65 | | otherwise = readMU arr j >>= writeMU arr j . (*p) 66 | >> mark arr p (j+p) 67 | 68 | -- Computes the answer to the above Project Euler problem. The correct 69 | -- answer is only generated for a stable sorting function. 70 | stableSortedRad :: Int -> Int 71 | -> (forall s e. UA e => Comparison e -> MUArr e s -> ST s ()) 72 | -> Int 73 | stableSortedRad n k sortBy = runST (do rads <- radSieve n 74 | index <- newMU (n + 1) 75 | fillUp index n 76 | sortBy (comparing fstS) 77 | (unsafeZipMU rads index) 78 | readMU k index) 79 | where 80 | fillUp arr k | k < 0 = return () 81 | | otherwise = writeMU arr k k >> fillUp arr (k-1) 82 | 83 | -- Computes the answer to the above Project Euler problem. This version 84 | -- will generate the correct answer even for unstable sorts, but may be 85 | -- marginally slower. 86 | unstableSortedRad :: Int -> Int 87 | -> (forall s e. UA e => Comparison e -> MUArr e s -> ST s ()) 88 | -> Int 89 | unstableSortedRad n k sortBy = runST (do rads <- radSieve n 90 | index <- newMU (n + 1) 91 | fillUp index n 92 | sortBy compare (unsafeZipMU rads index) 93 | readMU k index) 94 | where 95 | fillUp arr k | k < 0 = return () 96 | | otherwise = writeMU arr k k >> fillUp arr (k-1) 97 | 98 | -------------------------------------------------------------------------------- /cabal.haskell-ci: -------------------------------------------------------------------------------- 1 | branches: master 2 | 3 | -- Tests fail to build with GHC 7.10 4 | tests: >= 8.0 -------------------------------------------------------------------------------- /cabal.project: -------------------------------------------------------------------------------- 1 | packages: 2 | ./ 3 | 4 | -- Build all tests by default 5 | tests: True 6 | 7 | -- Show full test output 8 | test-show-details: direct 9 | 10 | allow-newer: 11 | , *:base 12 | -------------------------------------------------------------------------------- /include/vector.h: -------------------------------------------------------------------------------- 1 | #define PHASE_STREAM [1] 2 | #define PHASE_INNER [0] 3 | 4 | #define INLINE_STREAM INLINE PHASE_STREAM 5 | #define INLINE_INNER INLINE PHASE_INNER 6 | 7 | #ifndef NOT_VECTOR_MODULE 8 | import qualified Data.Vector.Internal.Check as Ck 9 | #endif 10 | 11 | #define ERROR(f) (Ck.f __FILE__ __LINE__) 12 | #define ASSERT (Ck.assert __FILE__ __LINE__) 13 | #define ENSURE (Ck.f __FILE__ __LINE__) 14 | #define CHECK(f) (Ck.f __FILE__ __LINE__) 15 | 16 | #define BOUNDS_ERROR(f) (ERROR(f) Ck.Bounds) 17 | #define BOUNDS_ASSERT (ASSERT Ck.Bounds) 18 | #define BOUNDS_ENSURE (ENSURE Ck.Bounds) 19 | #define BOUNDS_CHECK(f) (CHECK(f) Ck.Bounds) 20 | 21 | #define UNSAFE_ERROR(f) (ERROR(f) Ck.Unsafe) 22 | #define UNSAFE_ASSERT (ASSERT Ck.Unsafe) 23 | #define UNSAFE_ENSURE (ENSURE Ck.Unsafe) 24 | #define UNSAFE_CHECK(f) (CHECK(f) Ck.Unsafe) 25 | 26 | #define INTERNAL_ERROR(f) (ERROR(f) Ck.Internal) 27 | #define INTERNAL_ASSERT (ASSERT Ck.Internal) 28 | #define INTERNAL_ENSURE (ENSURE Ck.Internal) 29 | #define INTERNAL_CHECK(f) (CHECK(f) Ck.Internal) 30 | 31 | 32 | -------------------------------------------------------------------------------- /src/Data/Vector/Algorithms.hs: -------------------------------------------------------------------------------- 1 | {-# language BangPatterns, RankNTypes, ScopedTypeVariables #-} 2 | module Data.Vector.Algorithms where 3 | 4 | import Prelude hiding (length) 5 | import Control.Monad 6 | import Control.Monad.Primitive 7 | import Control.Monad.ST (runST) 8 | 9 | import Data.Vector.Generic.Mutable 10 | import qualified Data.Vector.Generic as V 11 | import qualified Data.Vector.Unboxed.Mutable as UMV 12 | import qualified Data.Bit as Bit 13 | 14 | import Data.Vector.Algorithms.Common (Comparison) 15 | import Data.Vector.Algorithms.Intro (sortUniqBy) 16 | import qualified Data.Vector.Algorithms.Search as S 17 | 18 | -- | The `nub` function which removes duplicate elements from a vector. 19 | nub :: forall v e . (V.Vector v e, Ord e) => v e -> v e 20 | nub = nubBy compare 21 | {-# INLINE nub #-} 22 | 23 | -- | A version of `nub` with a custom comparison predicate. 24 | -- 25 | -- /Note:/ This function makes use of `sortByUniq` using the intro 26 | -- sort algorithm. 27 | nubBy :: 28 | forall v e . (V.Vector v e) => 29 | Comparison e -> v e -> v e 30 | nubBy cmp vec = runST $ do 31 | mv <- V.unsafeThaw vec -- safe as the nubByMut algorithm copies the input 32 | destMV <- nubByMut sortUniqBy cmp mv 33 | v <- V.unsafeFreeze destMV 34 | pure (V.force v) 35 | {-# INLINE nubBy #-} 36 | 37 | -- | The `nubByMut` function takes in an in-place sort algorithm 38 | -- and uses it to do a de-deduplicated sort. It then uses this to 39 | -- remove duplicate elements from the input. 40 | -- 41 | -- /Note:/ Since this algorithm needs the original input and so 42 | -- copies before sorting in-place. As such, it is safe to use on 43 | -- immutable inputs. 44 | nubByMut :: 45 | forall m v e . (PrimMonad m, MVector v e) => 46 | (Comparison e -> v (PrimState m) e -> m (v (PrimState m) e)) 47 | -> Comparison e -> v (PrimState m) e -> m (v (PrimState m) e) 48 | nubByMut alg cmp inp = do 49 | let len = length inp 50 | inp' <- clone inp 51 | sortUniqs <- alg cmp inp' 52 | let uniqLen = length sortUniqs 53 | bitmask <- UMV.replicate uniqLen (Bit.Bit False) -- bitmask to track which elements have 54 | -- already been seen. 55 | dest :: v (PrimState m) e <- unsafeNew uniqLen -- return vector 56 | let 57 | go :: Int -> Int -> m () 58 | go !srcInd !destInd 59 | | srcInd == len = pure () 60 | | destInd == uniqLen = pure () 61 | | otherwise = do 62 | curr <- unsafeRead inp srcInd -- read current element 63 | sortInd <- S.binarySearchBy cmp sortUniqs curr -- find sorted index 64 | bit <- UMV.unsafeRead bitmask sortInd -- check if we have already seen 65 | -- this element in bitvector 66 | case bit of 67 | -- if we have seen it then iterate 68 | Bit.Bit True -> go (srcInd + 1) destInd 69 | -- if we haven't then write it into output 70 | -- and mark that it has been seen 71 | Bit.Bit False -> do 72 | UMV.unsafeWrite bitmask sortInd (Bit.Bit True) 73 | unsafeWrite dest destInd curr 74 | go (srcInd + 1) (destInd + 1) 75 | go 0 0 76 | pure dest 77 | {-# INLINABLE nubByMut #-} 78 | -------------------------------------------------------------------------------- /src/Data/Vector/Algorithms/AmericanFlag.hs: -------------------------------------------------------------------------------- 1 | {-# LANGUAGE BangPatterns #-} 2 | {-# LANGUAGE TypeFamilies #-} 3 | {-# LANGUAGE FlexibleContexts #-} 4 | {-# lANGUAGE ScopedTypeVariables #-} 5 | 6 | -- --------------------------------------------------------------------------- 7 | -- | 8 | -- Module : Data.Vector.Algorithms.AmericanFlag 9 | -- Copyright : (c) 2011 Dan Doel 10 | -- Maintainer : Dan Doel 11 | -- Stability : Experimental 12 | -- Portability : Non-portable (FlexibleContexts, ScopedTypeVariables) 13 | -- 14 | -- This module implements American flag sort: an in-place, unstable, bucket 15 | -- sort. Also in contrast to radix sort, the values are inspected in a big 16 | -- endian order, and buckets are sorted via recursive splitting. This, 17 | -- however, makes it sensible for sorting strings in lexicographic order 18 | -- (provided indexing is fast). 19 | -- 20 | -- The algorithm works as follows: at each stage, the array is looped over, 21 | -- counting the number of elements for each bucket. Then, starting at the 22 | -- beginning of the array, elements are permuted in place to reside in the 23 | -- proper bucket, following chains until they reach back to the current 24 | -- base index. Finally, each bucket is sorted recursively. This lends itself 25 | -- well to the aforementioned variable-length strings, and so the algorithm 26 | -- takes a stopping predicate, which is given a representative of the stripe, 27 | -- rather than running for a set number of iterations. 28 | 29 | module Data.Vector.Algorithms.AmericanFlag ( sort 30 | , sortUniq 31 | , sortBy 32 | , sortUniqBy 33 | , terminate 34 | , Lexicographic(..) 35 | ) where 36 | 37 | import Prelude hiding (read, length) 38 | 39 | import Control.Monad 40 | import Control.Monad.Primitive 41 | 42 | import Data.Proxy 43 | 44 | import Data.Word 45 | import Data.Int 46 | import Data.Bits 47 | 48 | import qualified Data.ByteString as B 49 | 50 | import Data.Vector.Generic.Mutable 51 | import qualified Data.Vector.Primitive.Mutable as PV 52 | 53 | import qualified Data.Vector.Unboxed.Mutable as U 54 | 55 | import Data.Vector.Algorithms.Common 56 | 57 | import qualified Data.Vector.Algorithms.Insertion as I 58 | 59 | import Foreign.Storable 60 | 61 | -- | The methods of this class specify the information necessary to sort 62 | -- arrays using the default ordering. The name 'Lexicographic' is meant 63 | -- to convey that index should return results in a similar way to indexing 64 | -- into a string. 65 | class Lexicographic e where 66 | -- | Computes the length of a representative of a stripe. It should take 'n' 67 | -- passes to sort values of extent 'n'. The extent may not be uniform across 68 | -- all values of the type. 69 | extent :: e -> Int 70 | 71 | -- | The size of the bucket array necessary for sorting es 72 | size :: Proxy e -> Int 73 | -- | Determines which bucket a given element should inhabit for a 74 | -- particular iteration. 75 | index :: Int -> e -> Int 76 | 77 | instance Lexicographic Word8 where 78 | extent _ = 1 79 | {-# INLINE extent #-} 80 | size _ = 256 81 | {-# INLINE size #-} 82 | index _ n = fromIntegral n 83 | {-# INLINE index #-} 84 | 85 | instance Lexicographic Word16 where 86 | extent _ = 2 87 | {-# INLINE extent #-} 88 | size _ = 256 89 | {-# INLINE size #-} 90 | index 0 n = fromIntegral $ (n `shiftR` 8) .&. 255 91 | index 1 n = fromIntegral $ n .&. 255 92 | index _ _ = 0 93 | {-# INLINE index #-} 94 | 95 | instance Lexicographic Word32 where 96 | extent _ = 4 97 | {-# INLINE extent #-} 98 | size _ = 256 99 | {-# INLINE size #-} 100 | index 0 n = fromIntegral $ (n `shiftR` 24) .&. 255 101 | index 1 n = fromIntegral $ (n `shiftR` 16) .&. 255 102 | index 2 n = fromIntegral $ (n `shiftR` 8) .&. 255 103 | index 3 n = fromIntegral $ n .&. 255 104 | index _ _ = 0 105 | {-# INLINE index #-} 106 | 107 | instance Lexicographic Word64 where 108 | extent _ = 8 109 | {-# INLINE extent #-} 110 | size _ = 256 111 | {-# INLINE size #-} 112 | index 0 n = fromIntegral $ (n `shiftR` 56) .&. 255 113 | index 1 n = fromIntegral $ (n `shiftR` 48) .&. 255 114 | index 2 n = fromIntegral $ (n `shiftR` 40) .&. 255 115 | index 3 n = fromIntegral $ (n `shiftR` 32) .&. 255 116 | index 4 n = fromIntegral $ (n `shiftR` 24) .&. 255 117 | index 5 n = fromIntegral $ (n `shiftR` 16) .&. 255 118 | index 6 n = fromIntegral $ (n `shiftR` 8) .&. 255 119 | index 7 n = fromIntegral $ n .&. 255 120 | index _ _ = 0 121 | {-# INLINE index #-} 122 | 123 | instance Lexicographic Word where 124 | extent _ = sizeOf (0 :: Word) 125 | {-# INLINE extent #-} 126 | size _ = 256 127 | {-# INLINE size #-} 128 | index 0 n = fromIntegral $ (n `shiftR` 56) .&. 255 129 | index 1 n = fromIntegral $ (n `shiftR` 48) .&. 255 130 | index 2 n = fromIntegral $ (n `shiftR` 40) .&. 255 131 | index 3 n = fromIntegral $ (n `shiftR` 32) .&. 255 132 | index 4 n = fromIntegral $ (n `shiftR` 24) .&. 255 133 | index 5 n = fromIntegral $ (n `shiftR` 16) .&. 255 134 | index 6 n = fromIntegral $ (n `shiftR` 8) .&. 255 135 | index 7 n = fromIntegral $ n .&. 255 136 | index _ _ = 0 137 | {-# INLINE index #-} 138 | 139 | instance Lexicographic Int8 where 140 | extent _ = 1 141 | {-# INLINE extent #-} 142 | size _ = 256 143 | {-# INLINE size #-} 144 | index _ n = 255 .&. fromIntegral n `xor` 128 145 | {-# INLINE index #-} 146 | 147 | instance Lexicographic Int16 where 148 | extent _ = 2 149 | {-# INLINE extent #-} 150 | size _ = 256 151 | {-# INLINE size #-} 152 | index 0 n = fromIntegral $ ((n `xor` minBound) `shiftR` 8) .&. 255 153 | index 1 n = fromIntegral $ n .&. 255 154 | index _ _ = 0 155 | {-# INLINE index #-} 156 | 157 | instance Lexicographic Int32 where 158 | extent _ = 4 159 | {-# INLINE extent #-} 160 | size _ = 256 161 | {-# INLINE size #-} 162 | index 0 n = fromIntegral $ ((n `xor` minBound) `shiftR` 24) .&. 255 163 | index 1 n = fromIntegral $ (n `shiftR` 16) .&. 255 164 | index 2 n = fromIntegral $ (n `shiftR` 8) .&. 255 165 | index 3 n = fromIntegral $ n .&. 255 166 | index _ _ = 0 167 | {-# INLINE index #-} 168 | 169 | instance Lexicographic Int64 where 170 | extent _ = 8 171 | {-# INLINE extent #-} 172 | size _ = 256 173 | {-# INLINE size #-} 174 | index 0 n = fromIntegral $ ((n `xor` minBound) `shiftR` 56) .&. 255 175 | index 1 n = fromIntegral $ (n `shiftR` 48) .&. 255 176 | index 2 n = fromIntegral $ (n `shiftR` 40) .&. 255 177 | index 3 n = fromIntegral $ (n `shiftR` 32) .&. 255 178 | index 4 n = fromIntegral $ (n `shiftR` 24) .&. 255 179 | index 5 n = fromIntegral $ (n `shiftR` 16) .&. 255 180 | index 6 n = fromIntegral $ (n `shiftR` 8) .&. 255 181 | index 7 n = fromIntegral $ n .&. 255 182 | index _ _ = 0 183 | {-# INLINE index #-} 184 | 185 | instance Lexicographic Int where 186 | extent _ = sizeOf (0 :: Int) 187 | {-# INLINE extent #-} 188 | size _ = 256 189 | {-# INLINE size #-} 190 | index 0 n = ((n `xor` minBound) `shiftR` 56) .&. 255 191 | index 1 n = (n `shiftR` 48) .&. 255 192 | index 2 n = (n `shiftR` 40) .&. 255 193 | index 3 n = (n `shiftR` 32) .&. 255 194 | index 4 n = (n `shiftR` 24) .&. 255 195 | index 5 n = (n `shiftR` 16) .&. 255 196 | index 6 n = (n `shiftR` 8) .&. 255 197 | index 7 n = n .&. 255 198 | index _ _ = 0 199 | {-# INLINE index #-} 200 | 201 | instance Lexicographic B.ByteString where 202 | extent = B.length 203 | {-# INLINE extent #-} 204 | size _ = 257 205 | {-# INLINE size #-} 206 | index i b 207 | | i >= B.length b = 0 208 | | otherwise = fromIntegral (B.index b i) + 1 209 | {-# INLINE index #-} 210 | 211 | instance (Lexicographic a, Lexicographic b) => Lexicographic (a, b) where 212 | extent (a,b) = extent a + extent b 213 | {-# INLINE extent #-} 214 | size _ = size (Proxy :: Proxy a) `max` size (Proxy :: Proxy b) 215 | {-# INLINE size #-} 216 | index i (a,b) 217 | | i >= extent a = index i b 218 | | otherwise = index i a 219 | {-# INLINE index #-} 220 | 221 | instance (Lexicographic a, Lexicographic b) => Lexicographic (Either a b) where 222 | extent (Left a) = 1 + extent a 223 | extent (Right b) = 1 + extent b 224 | {-# INLINE extent #-} 225 | size _ = size (Proxy :: Proxy a) `max` size (Proxy :: Proxy b) 226 | {-# INLINE size #-} 227 | index 0 (Left _) = 0 228 | index 0 (Right _) = 1 229 | index n (Left a) = index (n-1) a 230 | index n (Right b) = index (n-1) b 231 | {-# INLINE index #-} 232 | 233 | -- | Given a representative of a stripe and an index number, this 234 | -- function determines whether to stop sorting. 235 | terminate :: Lexicographic e => e -> Int -> Bool 236 | terminate e i = i >= extent e 237 | {-# INLINE terminate #-} 238 | 239 | -- | Sorts an array using the default ordering. Both Lexicographic and 240 | -- Ord are necessary because the algorithm falls back to insertion sort 241 | -- for sufficiently small arrays. 242 | sort :: forall e m v. (PrimMonad m, MVector v e, Lexicographic e, Ord e) 243 | => v (PrimState m) e -> m () 244 | sort v = sortBy compare terminate (size p) index v 245 | where p :: Proxy e 246 | p = Proxy 247 | {-# INLINE sort #-} 248 | 249 | -- | A variant on `sort` that returns a vector of unique elements. 250 | sortUniq :: forall e m v. (PrimMonad m, MVector v e, Lexicographic e, Ord e) 251 | => v (PrimState m) e -> m (v (PrimState m) e) 252 | sortUniq v = sortUniqBy compare terminate (size p) index v 253 | where p :: Proxy e 254 | p = Proxy 255 | {-# INLINE sortUniq #-} 256 | 257 | -- | A fully parameterized version of the sorting algorithm. Again, this 258 | -- function takes both radix information and a comparison, because the 259 | -- algorithms falls back to insertion sort for small arrays. 260 | sortBy :: (PrimMonad m, MVector v e) 261 | => Comparison e -- ^ a comparison for the insertion sort flalback 262 | -> (e -> Int -> Bool) -- ^ determines whether a stripe is complete 263 | -> Int -- ^ the number of buckets necessary 264 | -> (Int -> e -> Int) -- ^ the big-endian radix function 265 | -> v (PrimState m) e -- ^ the array to be sorted 266 | -> m () 267 | sortBy cmp stop buckets radix v 268 | | length v == 0 = return () 269 | | otherwise = do count <- new buckets 270 | pile <- new buckets 271 | countLoop (radix 0) v count 272 | flagLoop cmp stop radix count pile v 273 | {-# INLINE sortBy #-} 274 | 275 | -- | A variant on `sortBy` which returns a vector of unique elements. 276 | sortUniqBy :: (PrimMonad m, MVector v e) 277 | => Comparison e -- ^ a comparison for the insertion sort flalback 278 | -> (e -> Int -> Bool) -- ^ determines whether a stripe is complete 279 | -> Int -- ^ the number of buckets necessary 280 | -> (Int -> e -> Int) -- ^ the big-endian radix function 281 | -> v (PrimState m) e -- ^ the array to be sorted 282 | -> m (v (PrimState m) e) 283 | sortUniqBy cmp stop buckets radix v 284 | | length v == 0 = return v 285 | | otherwise = do count <- new buckets 286 | pile <- new buckets 287 | countLoop (radix 0) v count 288 | flagLoop cmp stop radix count pile v 289 | uniqueMutableBy cmp v 290 | {-# INLINE sortUniqBy #-} 291 | 292 | flagLoop :: (PrimMonad m, MVector v e) 293 | => Comparison e 294 | -> (e -> Int -> Bool) -- number of passes 295 | -> (Int -> e -> Int) -- radix function 296 | -> PV.MVector (PrimState m) Int -- auxiliary count array 297 | -> PV.MVector (PrimState m) Int -- auxiliary pile array 298 | -> v (PrimState m) e -- source array 299 | -> m () 300 | flagLoop cmp stop radix count pile v = go 0 v 301 | where 302 | 303 | go pass v = do e <- unsafeRead v 0 304 | unless (stop e $ pass - 1) $ go' pass v 305 | 306 | go' pass v 307 | | len < threshold = I.sortByBounds cmp v 0 len 308 | | otherwise = do accumulate count pile 309 | permute (radix pass) count pile v 310 | recurse 0 311 | where 312 | len = length v 313 | ppass = pass + 1 314 | 315 | recurse i 316 | | i < len = do j <- countStripe (radix ppass) (radix pass) count v i 317 | go ppass (unsafeSlice i (j - i) v) 318 | recurse j 319 | | otherwise = return () 320 | {-# INLINE flagLoop #-} 321 | 322 | accumulate :: (PrimMonad m) 323 | => PV.MVector (PrimState m) Int 324 | -> PV.MVector (PrimState m) Int 325 | -> m () 326 | accumulate count pile = loop 0 0 327 | where 328 | len = length count 329 | 330 | loop i acc 331 | | i < len = do ci <- unsafeRead count i 332 | let acc' = acc + ci 333 | unsafeWrite pile i acc 334 | unsafeWrite count i acc' 335 | loop (i+1) acc' 336 | | otherwise = return () 337 | {-# INLINE accumulate #-} 338 | 339 | permute :: (PrimMonad m, MVector v e) 340 | => (e -> Int) -- radix function 341 | -> PV.MVector (PrimState m) Int -- count array 342 | -> PV.MVector (PrimState m) Int -- pile array 343 | -> v (PrimState m) e -- source array 344 | -> m () 345 | permute rdx count pile v = go 0 346 | where 347 | len = length v 348 | 349 | go i 350 | | i < len = do e <- unsafeRead v i 351 | let r = rdx e 352 | p <- unsafeRead pile r 353 | m <- if r > 0 354 | then unsafeRead count (r-1) 355 | else return 0 356 | case () of 357 | -- if the current element is already in the right pile, 358 | -- go to the end of the pile 359 | _ | m <= i && i < p -> go p 360 | -- if the current element happens to be in the right 361 | -- pile, bump the pile counter and go to the next element 362 | | i == p -> unsafeWrite pile r (p+1) >> go (i+1) 363 | -- otherwise follow the chain 364 | | otherwise -> follow i e p >> go (i+1) 365 | | otherwise = return () 366 | 367 | follow i e j = do en <- unsafeRead v j 368 | let r = rdx en 369 | p <- inc pile r 370 | if p == j 371 | -- if the target happens to be in the right pile, don't move it. 372 | then follow i e (j+1) 373 | else unsafeWrite v j e >> if i == p 374 | then unsafeWrite v i en 375 | else follow i en p 376 | {-# INLINE permute #-} 377 | 378 | countStripe :: (PrimMonad m, MVector v e) 379 | => (e -> Int) -- radix function 380 | -> (e -> Int) -- stripe function 381 | -> PV.MVector (PrimState m) Int -- count array 382 | -> v (PrimState m) e -- source array 383 | -> Int -- starting position 384 | -> m Int -- end of stripe: [lo,hi) 385 | countStripe rdx str count v lo = do set count 0 386 | e <- unsafeRead v lo 387 | go (str e) e (lo+1) 388 | where 389 | len = length v 390 | 391 | go !s e i = inc count (rdx e) >> 392 | if i < len 393 | then do en <- unsafeRead v i 394 | if str en == s 395 | then go s en (i+1) 396 | else return i 397 | else return len 398 | {-# INLINE countStripe #-} 399 | 400 | threshold :: Int 401 | threshold = 25 402 | 403 | -------------------------------------------------------------------------------- /src/Data/Vector/Algorithms/Combinators.hs: -------------------------------------------------------------------------------- 1 | {-# LANGUAGE Rank2Types, TypeOperators #-} 2 | 3 | -- --------------------------------------------------------------------------- 4 | -- | 5 | -- Module : Data.Vector.Algorithms.Combinators 6 | -- Copyright : (c) 2008-2010 Dan Doel 7 | -- Maintainer : Dan Doel 8 | -- Stability : Experimental 9 | -- Portability : Non-portable (rank-2 types) 10 | -- 11 | -- The purpose of this module is to supply various combinators for commonly 12 | -- used idioms for the algorithms in this package. Examples at the time of 13 | -- this writing include running an algorithm keyed on some function of the 14 | -- elements (but only computing said function once per element), and safely 15 | -- applying the algorithms on mutable arrays to immutable arrays. 16 | 17 | module Data.Vector.Algorithms.Combinators 18 | ( 19 | -- , usingKeys 20 | -- , usingIxKeys 21 | ) where 22 | 23 | import Prelude hiding (length) 24 | 25 | import Control.Monad.ST 26 | 27 | import Data.Ord 28 | 29 | import Data.Vector.Generic 30 | 31 | import qualified Data.Vector.Generic.Mutable as M 32 | import qualified Data.Vector.Generic.New as N 33 | 34 | {- 35 | -- | Uses a function to compute a key for each element which the 36 | -- algorithm should use in lieu of the actual element. For instance: 37 | -- 38 | -- > usingKeys sortBy f arr 39 | -- 40 | -- should produce the same results as: 41 | -- 42 | -- > sortBy (comparing f) arr 43 | -- 44 | -- the difference being that usingKeys computes each key only once 45 | -- which can be more efficient for expensive key functions. 46 | usingKeys :: (UA e, UA k, Ord k) 47 | => (forall e'. (UA e') => Comparison e' -> MUArr e' s -> ST s ()) 48 | -> (e -> k) 49 | -> MUArr e s 50 | -> ST s () 51 | usingKeys algo f arr = usingIxKeys algo (const f) arr 52 | {-# INLINE usingKeys #-} 53 | 54 | -- | As usingKeys, only the key function has access to the array index 55 | -- at which each element is stored. 56 | usingIxKeys :: (UA e, UA k, Ord k) 57 | => (forall e'. (UA e') => Comparison e' -> MUArr e' s -> ST s ()) 58 | -> (Int -> e -> k) 59 | -> MUArr e s 60 | -> ST s () 61 | usingIxKeys algo f arr = do 62 | keys <- newMU (lengthMU arr) 63 | fill len keys 64 | algo (comparing fstS) (unsafeZipMU keys arr) 65 | where 66 | len = lengthMU arr 67 | fill k keys 68 | | k < 0 = return () 69 | | otherwise = readMU arr k >>= writeMU keys k . f k >> fill (k-1) keys 70 | {-# INLINE usingIxKeys #-} 71 | -} 72 | -------------------------------------------------------------------------------- /src/Data/Vector/Algorithms/Common.hs: -------------------------------------------------------------------------------- 1 | {-# LANGUAGE FlexibleContexts #-} 2 | {-# LANGUAGE TypeFamilies #-} 3 | {-# LANGUAGE BangPatterns #-} 4 | {-# LANGUAGE ScopedTypeVariables #-} 5 | 6 | -- --------------------------------------------------------------------------- 7 | -- | 8 | -- Module : Data.Vector.Algorithms.Common 9 | -- Copyright : (c) 2008-2011 Dan Doel 10 | -- Maintainer : Dan Doel 11 | -- Stability : Experimental 12 | -- Portability : Portable 13 | -- 14 | -- Common operations and utility functions for all sorts 15 | 16 | module Data.Vector.Algorithms.Common 17 | ( type Comparison 18 | , copyOffset 19 | , inc 20 | , countLoop 21 | , midPoint 22 | , uniqueMutableBy 23 | ) 24 | where 25 | 26 | import Prelude hiding (read, length) 27 | 28 | import Control.Monad.Primitive 29 | 30 | import Data.Vector.Generic.Mutable 31 | import Data.Word (Word) 32 | 33 | import qualified Data.Vector.Primitive.Mutable as PV 34 | 35 | -- | A type of comparisons between two values of a given type. 36 | type Comparison e = e -> e -> Ordering 37 | 38 | copyOffset :: (PrimMonad m, MVector v e) 39 | => v (PrimState m) e -> v (PrimState m) e -> Int -> Int -> Int -> m () 40 | copyOffset from to iFrom iTo len = 41 | unsafeCopy (unsafeSlice iTo len to) (unsafeSlice iFrom len from) 42 | {-# INLINE copyOffset #-} 43 | 44 | inc :: (PrimMonad m, MVector v Int) => v (PrimState m) Int -> Int -> m Int 45 | inc arr i = unsafeRead arr i >>= \e -> unsafeWrite arr i (e+1) >> return e 46 | {-# INLINE inc #-} 47 | 48 | -- shared bucket sorting stuff 49 | countLoop :: (PrimMonad m, MVector v e) 50 | => (e -> Int) 51 | -> v (PrimState m) e -> PV.MVector (PrimState m) Int -> m () 52 | countLoop rdx src count = set count 0 >> go 0 53 | where 54 | len = length src 55 | go i 56 | | i < len = unsafeRead src i >>= inc count . rdx >> go (i+1) 57 | | otherwise = return () 58 | {-# INLINE countLoop #-} 59 | 60 | midPoint :: Int -> Int -> Int 61 | midPoint a b = 62 | toInt $ (toWord a + toWord b) `div` 2 63 | where 64 | toWord :: Int -> Word 65 | toWord = fromIntegral 66 | 67 | toInt :: Word -> Int 68 | toInt = fromIntegral 69 | {-# INLINE midPoint #-} 70 | 71 | -- Adapted from Andrew Martin's uniquqMutable in the primitive-sort package 72 | uniqueMutableBy :: forall m v a . (PrimMonad m, MVector v a) 73 | => Comparison a -> v (PrimState m) a -> m (v (PrimState m) a) 74 | uniqueMutableBy cmp mv = do 75 | let !len = basicLength mv 76 | if len > 1 77 | then do 78 | !a0 <- unsafeRead mv 0 79 | let findFirstDuplicate :: a -> Int -> m Int 80 | findFirstDuplicate !prev !ix = if ix < len 81 | then do 82 | a <- unsafeRead mv ix 83 | if cmp a prev == EQ 84 | then return ix 85 | else findFirstDuplicate a (ix + 1) 86 | else return ix 87 | dupIx <- findFirstDuplicate a0 1 88 | if dupIx == len 89 | then return mv 90 | else do 91 | let deduplicate :: a -> Int -> Int -> m Int 92 | deduplicate !prev !srcIx !dstIx = if srcIx < len 93 | then do 94 | a <- unsafeRead mv srcIx 95 | if cmp a prev == EQ 96 | then deduplicate a (srcIx + 1) dstIx 97 | else do 98 | unsafeWrite mv dstIx a 99 | deduplicate a (srcIx + 1) (dstIx + 1) 100 | else return dstIx 101 | !a <- unsafeRead mv dupIx 102 | !reducedLen <- deduplicate a (dupIx + 1) dupIx 103 | resizeVector mv reducedLen 104 | else return mv 105 | {-# INLINABLE uniqueMutableBy #-} 106 | 107 | -- Used internally in uniqueMutableBy: copies the elements of a vector to one 108 | -- of a smaller size. 109 | resizeVector 110 | :: (MVector v a, PrimMonad m) 111 | => v (PrimState m) a -> Int -> m (v (PrimState m) a) 112 | resizeVector !src !sz = do 113 | dst <- unsafeNew sz 114 | copyToSmaller dst src 115 | pure dst 116 | {-# inline resizeVector #-} 117 | 118 | -- Used internally in resizeVector: copy a vector from a larger to 119 | -- smaller vector. Should not be used if the source vector 120 | -- is smaller than the target vector. 121 | copyToSmaller 122 | :: (MVector v a, PrimMonad m) 123 | => v (PrimState m) a -> v (PrimState m) a -> m () 124 | copyToSmaller !dst !src = stToPrim $ do_copy 0 125 | where 126 | !n = basicLength dst 127 | 128 | do_copy i | i < n = do 129 | x <- basicUnsafeRead src i 130 | basicUnsafeWrite dst i x 131 | do_copy (i+1) 132 | | otherwise = return () 133 | -------------------------------------------------------------------------------- /src/Data/Vector/Algorithms/Heap.hs: -------------------------------------------------------------------------------- 1 | {-# LANGUAGE TypeFamilies #-} 2 | {-# LANGUAGE TypeOperators #-} 3 | 4 | -- --------------------------------------------------------------------------- 5 | -- | 6 | -- Module : Data.Vector.Algorithms.Heap 7 | -- Copyright : (c) 2008-2015 Dan Doel 8 | -- Maintainer : Dan Doel 9 | -- Stability : Experimental 10 | -- Portability : Non-portable (type operators) 11 | -- 12 | -- This module implements operations for working with a quaternary heap stored 13 | -- in an unboxed array. Most heapsorts are defined in terms of a binary heap, 14 | -- in which each internal node has at most two children. By contrast, a 15 | -- quaternary heap has internal nodes with up to four children. This reduces 16 | -- the number of comparisons in a heapsort slightly, and improves locality 17 | -- (again, slightly) by flattening out the heap. 18 | 19 | module Data.Vector.Algorithms.Heap 20 | ( -- * Sorting 21 | sort 22 | , sortUniq 23 | , sortBy 24 | , sortUniqBy 25 | , sortByBounds 26 | -- * Selection 27 | , select 28 | , selectBy 29 | , selectByBounds 30 | -- * Partial sorts 31 | , partialSort 32 | , partialSortBy 33 | , partialSortByBounds 34 | -- * Heap operations 35 | , heapify 36 | , pop 37 | , popTo 38 | , sortHeap 39 | , heapInsert 40 | , Comparison 41 | ) where 42 | 43 | import Prelude hiding (read, length) 44 | 45 | import Control.Monad 46 | import Control.Monad.Primitive 47 | 48 | import Data.Bits 49 | 50 | import Data.Vector.Generic.Mutable 51 | 52 | import Data.Vector.Algorithms.Common (Comparison, uniqueMutableBy) 53 | 54 | import qualified Data.Vector.Algorithms.Optimal as O 55 | 56 | -- | Sorts an entire array using the default ordering. 57 | sort :: (PrimMonad m, MVector v e, Ord e) => v (PrimState m) e -> m () 58 | sort = sortBy compare 59 | {-# INLINE sort #-} 60 | 61 | -- | A variant on `sort` that returns a vector of unique elements. 62 | sortUniq :: (PrimMonad m, MVector v e, Ord e) => v (PrimState m) e -> m (v (PrimState m) e) 63 | sortUniq = sortUniqBy compare 64 | {-# INLINE sortUniq #-} 65 | 66 | -- | Sorts an entire array using a custom ordering. 67 | sortBy :: (PrimMonad m, MVector v e) => Comparison e -> v (PrimState m) e -> m () 68 | sortBy cmp a = sortByBounds cmp a 0 (length a) 69 | {-# INLINE sortBy #-} 70 | 71 | -- | A variant on `sortBy` which returns a vector of unique elements. 72 | sortUniqBy :: (PrimMonad m, MVector v e) 73 | => Comparison e -> v (PrimState m) e -> m (v (PrimState m) e) 74 | sortUniqBy cmp a = do 75 | sortByBounds cmp a 0 (length a) 76 | uniqueMutableBy cmp a 77 | {-# INLINE sortUniqBy #-} 78 | 79 | -- | Sorts a portion of an array [l,u) using a custom ordering 80 | sortByBounds 81 | :: (PrimMonad m, MVector v e) 82 | => Comparison e 83 | -> v (PrimState m) e 84 | -> Int -- ^ lower index, l 85 | -> Int -- ^ upper index, u 86 | -> m () 87 | sortByBounds cmp a l u 88 | | len < 2 = return () 89 | | len == 2 = O.sort2ByOffset cmp a l 90 | | len == 3 = O.sort3ByOffset cmp a l 91 | | len == 4 = O.sort4ByOffset cmp a l 92 | | otherwise = heapify cmp a l u >> sortHeap cmp a l (l+4) u >> O.sort4ByOffset cmp a l 93 | where len = u - l 94 | {-# INLINE sortByBounds #-} 95 | 96 | -- | Moves the lowest k elements to the front of the array. 97 | -- The elements will be in no particular order. 98 | select 99 | :: (PrimMonad m, MVector v e, Ord e) 100 | => v (PrimState m) e 101 | -> Int -- ^ number of elements to select, k 102 | -> m () 103 | select = selectBy compare 104 | {-# INLINE select #-} 105 | 106 | -- | Moves the lowest (as defined by the comparison) k elements 107 | -- to the front of the array. The elements will be in no particular 108 | -- order. 109 | selectBy 110 | :: (PrimMonad m, MVector v e) 111 | => Comparison e 112 | -> v (PrimState m) e 113 | -> Int -- ^ number of elements to select, k 114 | -> m () 115 | selectBy cmp a k = selectByBounds cmp a k 0 (length a) 116 | {-# INLINE selectBy #-} 117 | 118 | -- | Moves the 'lowest' k elements in the portion [l,u) of the 119 | -- array into the positions [l,k+l). The elements will be in 120 | -- no particular order. 121 | selectByBounds 122 | :: (PrimMonad m, MVector v e) 123 | => Comparison e 124 | -> v (PrimState m) e 125 | -> Int -- ^ number of elements to select, k 126 | -> Int -- ^ lower index, l 127 | -> Int -- ^ upper index, u 128 | -> m () 129 | selectByBounds cmp a k l u 130 | | l + k <= u = heapify cmp a l (l + k) >> go l (l + k) (u - 1) 131 | | otherwise = return () 132 | where 133 | go l m u 134 | | u < m = return () 135 | | otherwise = do el <- unsafeRead a l 136 | eu <- unsafeRead a u 137 | case cmp eu el of 138 | LT -> popTo cmp a l m u 139 | _ -> return () 140 | go l m (u - 1) 141 | {-# INLINE selectByBounds #-} 142 | 143 | -- | Moves the lowest k elements to the front of the array, sorted. 144 | -- 145 | -- The remaining values of the array will be in no particular order. 146 | partialSort 147 | :: (PrimMonad m, MVector v e, Ord e) 148 | => v (PrimState m) e 149 | -> Int -- ^ number of elements to sort, k 150 | -> m () 151 | partialSort = partialSortBy compare 152 | {-# INLINE partialSort #-} 153 | 154 | -- | Moves the lowest k elements (as defined by the comparison) to 155 | -- the front of the array, sorted. 156 | -- 157 | -- The remaining values of the array will be in no particular order. 158 | partialSortBy 159 | :: (PrimMonad m, MVector v e) 160 | => Comparison e 161 | -> v (PrimState m) e 162 | -> Int -- ^ number of elements to sort, k 163 | -> m () 164 | partialSortBy cmp a k = partialSortByBounds cmp a k 0 (length a) 165 | {-# INLINE partialSortBy #-} 166 | 167 | -- | Moves the lowest k elements in the portion [l,u) of the array 168 | -- into positions [l,k+l), sorted. 169 | -- 170 | -- The remaining values in [l,u) will be in no particular order. Values outside 171 | -- the range [l,u) will be unaffected. 172 | partialSortByBounds 173 | :: (PrimMonad m, MVector v e) 174 | => Comparison e 175 | -> v (PrimState m) e 176 | -> Int -- ^ number of elements to sort, k 177 | -> Int -- ^ lower index, l 178 | -> Int -- ^ upper index, u 179 | -> m () 180 | partialSortByBounds cmp a k l u 181 | -- this potentially does more work than absolutely required, 182 | -- but using a heap to find the least 2 of 4 elements 183 | -- seems unlikely to be better than just sorting all of them 184 | -- with an optimal sort, and the latter is obviously index 185 | -- correct. 186 | | len < 2 = return () 187 | | len == 2 = O.sort2ByOffset cmp a l 188 | | len == 3 = O.sort3ByOffset cmp a l 189 | | len == 4 = O.sort4ByOffset cmp a l 190 | | u <= l + k = sortByBounds cmp a l u 191 | | otherwise = do selectByBounds cmp a (k + 1) l u 192 | sortHeap cmp a l (l + 4) (l + k + 1) 193 | O.sort4ByOffset cmp a l 194 | where 195 | len = u - l 196 | {-# INLINE partialSortByBounds #-} 197 | 198 | -- | Constructs a heap in a portion of an array [l, u), using the values therein. 199 | -- 200 | -- Note: 'heapify' is more efficient than constructing a heap by repeated 201 | -- insertion. Repeated insertion has complexity O(n*log n) while 'heapify' is able 202 | -- to construct a heap in O(n), where n is the number of elements in the heap. 203 | heapify 204 | :: (PrimMonad m, MVector v e) 205 | => Comparison e 206 | -> v (PrimState m) e 207 | -> Int -- ^ lower index, l 208 | -> Int -- ^ upper index, u 209 | -> m () 210 | heapify cmp a l u = loop $ (len - 1) `shiftR` 2 211 | where 212 | len = u - l 213 | loop k 214 | | k < 0 = return () 215 | | otherwise = unsafeRead a (l+k) >>= \e -> 216 | siftByOffset cmp a e l k len >> loop (k - 1) 217 | {-# INLINE heapify #-} 218 | 219 | -- | Given a heap stored in a portion of an array [l,u), swaps the 220 | -- top of the heap with the element at u and rebuilds the heap. 221 | pop 222 | :: (PrimMonad m, MVector v e) 223 | => Comparison e 224 | -> v (PrimState m) e 225 | -> Int -- ^ lower heap index, l 226 | -> Int -- ^ upper heap index, u 227 | -> m () 228 | pop cmp a l u = popTo cmp a l u u 229 | {-# INLINE pop #-} 230 | 231 | -- | Given a heap stored in a portion of an array [l,u) swaps the top 232 | -- of the heap with the element at position t, and rebuilds the heap. 233 | popTo 234 | :: (PrimMonad m, MVector v e) 235 | => Comparison e 236 | -> v (PrimState m) e 237 | -> Int -- ^ lower heap index, l 238 | -> Int -- ^ upper heap index, u 239 | -> Int -- ^ index to pop to, t 240 | -> m () 241 | popTo cmp a l u t = do al <- unsafeRead a l 242 | at <- unsafeRead a t 243 | unsafeWrite a t al 244 | siftByOffset cmp a at l 0 (u - l) 245 | {-# INLINE popTo #-} 246 | 247 | -- | Given a heap stored in a portion of an array [l,u), sorts the 248 | -- highest values into [m,u). The elements in [l,m) are not in any 249 | -- particular order. 250 | sortHeap 251 | :: (PrimMonad m, MVector v e) 252 | => Comparison e 253 | -> v (PrimState m) e 254 | -> Int -- ^ lower heap index, l 255 | -> Int -- ^ lower bound of final sorted portion, m 256 | -> Int -- ^ upper heap index, u 257 | -> m () 258 | sortHeap cmp a l m u = loop (u-1) >> unsafeSwap a l m 259 | where 260 | loop k 261 | | m < k = pop cmp a l k >> loop (k-1) 262 | | otherwise = return () 263 | {-# INLINE sortHeap #-} 264 | 265 | -- | Given a heap stored in a portion of an array [l,u) and an element e, 266 | -- inserts the element into the heap, resulting in a heap in [l,u]. 267 | -- 268 | -- Note: it is best to only use this operation when incremental construction of 269 | -- a heap is required. 'heapify' is capable of building a heap in O(n) time, 270 | -- while repeated insertion takes O(n*log n) time. 271 | heapInsert 272 | :: (PrimMonad m, MVector v e) 273 | => Comparison e 274 | -> v (PrimState m) e 275 | -> Int -- ^ lower heap index, l 276 | -> Int -- ^ upper heap index, u 277 | -> e -- ^ element to be inserted, e 278 | -> m () 279 | heapInsert cmp v l u e = sift (u - l) 280 | where 281 | sift k 282 | | k <= 0 = unsafeWrite v l e 283 | | otherwise = let pi = shiftR (k-1) 2 284 | in unsafeRead v (l + pi) >>= \p -> case cmp p e of 285 | LT -> unsafeWrite v (l + k) p >> sift pi 286 | _ -> unsafeWrite v (l + k) e 287 | {-# INLINE heapInsert #-} 288 | 289 | -- Rebuilds a heap with a hole in it from start downwards. Afterward, 290 | -- the heap property should apply for [start + off, len + off). val 291 | -- is the new value to be put in the hole. 292 | siftByOffset :: (PrimMonad m, MVector v e) 293 | => Comparison e -> v (PrimState m) e -> e -> Int -> Int -> Int -> m () 294 | siftByOffset cmp a val off start len = sift val start len 295 | where 296 | sift val root len 297 | | child < len = do (child', ac) <- maximumChild cmp a off child len 298 | case cmp val ac of 299 | LT -> unsafeWrite a (root + off) ac >> sift val child' len 300 | _ -> unsafeWrite a (root + off) val 301 | | otherwise = unsafeWrite a (root + off) val 302 | where child = root `shiftL` 2 + 1 303 | {-# INLINE siftByOffset #-} 304 | 305 | -- Finds the maximum child of a heap node, given the indx of the first child. 306 | maximumChild :: (PrimMonad m, MVector v e) 307 | => Comparison e -> v (PrimState m) e -> Int -> Int -> Int -> m (Int, e) 308 | maximumChild cmp a off child1 len 309 | | child4 < len = do ac1 <- unsafeRead a (child1 + off) 310 | ac2 <- unsafeRead a (child2 + off) 311 | ac3 <- unsafeRead a (child3 + off) 312 | ac4 <- unsafeRead a (child4 + off) 313 | return $ case cmp ac1 ac2 of 314 | LT -> case cmp ac2 ac3 of 315 | LT -> case cmp ac3 ac4 of 316 | LT -> (child4, ac4) 317 | _ -> (child3, ac3) 318 | _ -> case cmp ac2 ac4 of 319 | LT -> (child4, ac4) 320 | _ -> (child2, ac2) 321 | _ -> case cmp ac1 ac3 of 322 | LT -> case cmp ac3 ac4 of 323 | LT -> (child4, ac4) 324 | _ -> (child3, ac3) 325 | _ -> case cmp ac1 ac4 of 326 | LT -> (child4, ac4) 327 | _ -> (child1, ac1) 328 | | child3 < len = do ac1 <- unsafeRead a (child1 + off) 329 | ac2 <- unsafeRead a (child2 + off) 330 | ac3 <- unsafeRead a (child3 + off) 331 | return $ case cmp ac1 ac2 of 332 | LT -> case cmp ac2 ac3 of 333 | LT -> (child3, ac3) 334 | _ -> (child2, ac2) 335 | _ -> case cmp ac1 ac3 of 336 | LT -> (child3, ac3) 337 | _ -> (child1, ac1) 338 | | child2 < len = do ac1 <- unsafeRead a (child1 + off) 339 | ac2 <- unsafeRead a (child2 + off) 340 | return $ case cmp ac1 ac2 of 341 | LT -> (child2, ac2) 342 | _ -> (child1, ac1) 343 | | otherwise = do ac1 <- unsafeRead a (child1 + off) ; return (child1, ac1) 344 | where 345 | child2 = child1 + 1 346 | child3 = child1 + 2 347 | child4 = child1 + 3 348 | {-# INLINE maximumChild #-} 349 | -------------------------------------------------------------------------------- /src/Data/Vector/Algorithms/Insertion.hs: -------------------------------------------------------------------------------- 1 | {-# LANGUAGE TypeFamilies #-} 2 | 3 | -- --------------------------------------------------------------------------- 4 | -- | 5 | -- Module : Data.Vector.Algorithms.Insertion 6 | -- Copyright : (c) 2008-2010 Dan Doel 7 | -- Maintainer : Dan Doel 8 | -- Stability : Experimental 9 | -- Portability : Portable 10 | -- 11 | -- A simple insertion sort. Though it's O(n^2), its iterative nature can be 12 | -- beneficial for small arrays. It is used to sort small segments of an array 13 | -- by some of the more heavy-duty, recursive algorithms. 14 | 15 | module Data.Vector.Algorithms.Insertion 16 | ( sort 17 | , sortUniq 18 | , sortBy 19 | , sortUniqBy 20 | , sortByBounds 21 | , sortByBounds' 22 | , Comparison 23 | ) where 24 | 25 | 26 | import Prelude hiding (read, length) 27 | 28 | import Control.Monad.Primitive 29 | 30 | import Data.Vector.Generic.Mutable 31 | 32 | import Data.Vector.Algorithms.Common (Comparison, uniqueMutableBy) 33 | 34 | import qualified Data.Vector.Algorithms.Optimal as O 35 | 36 | -- | Sorts an entire array using the default comparison for the type 37 | sort :: (PrimMonad m, MVector v e, Ord e) => v (PrimState m) e -> m () 38 | sort = sortBy compare 39 | {-# INLINE sort #-} 40 | 41 | -- | A variant on `sort` that returns a vector of unique elements. 42 | sortUniq :: (PrimMonad m, MVector v e, Ord e) => v (PrimState m) e -> m (v (PrimState m) e) 43 | sortUniq = sortUniqBy compare 44 | {-# INLINE sortUniq #-} 45 | 46 | -- | Sorts an entire array using a given comparison 47 | sortBy :: (PrimMonad m, MVector v e) => Comparison e -> v (PrimState m) e -> m () 48 | sortBy cmp a = sortByBounds cmp a 0 (length a) 49 | {-# INLINE sortBy #-} 50 | 51 | -- | A variant on `sortBy` which returns a vector of unique elements. 52 | sortUniqBy :: (PrimMonad m, MVector v e) => Comparison e -> v (PrimState m) e -> m (v (PrimState m) e) 53 | sortUniqBy cmp a = do 54 | sortByBounds cmp a 0 (length a) 55 | uniqueMutableBy cmp a 56 | {-# INLINE sortUniqBy #-} 57 | 58 | -- | Sorts the portion of an array delimited by [l,u) 59 | sortByBounds :: (PrimMonad m, MVector v e) 60 | => Comparison e -> v (PrimState m) e -> Int -> Int -> m () 61 | sortByBounds cmp a l u 62 | | len < 2 = return () 63 | | len == 2 = O.sort2ByOffset cmp a l 64 | | len == 3 = O.sort3ByOffset cmp a l 65 | | len == 4 = O.sort4ByOffset cmp a l 66 | | otherwise = O.sort4ByOffset cmp a l >> sortByBounds' cmp a l (l + 4) u 67 | where 68 | len = u - l 69 | {-# INLINE sortByBounds #-} 70 | 71 | -- | Sorts the portion of the array delimited by [l,u) under the assumption 72 | -- that [l,m) is already sorted. 73 | sortByBounds' :: (PrimMonad m, MVector v e) 74 | => Comparison e -> v (PrimState m) e -> Int -> Int -> Int -> m () 75 | sortByBounds' cmp a l m u = sort m 76 | where 77 | sort i 78 | | i < u = do v <- unsafeRead a i 79 | insert cmp a l v i 80 | sort (i+1) 81 | | otherwise = return () 82 | {-# INLINE sortByBounds' #-} 83 | 84 | -- Given a sorted array in [l,u), inserts val into its proper position, 85 | -- yielding a sorted [l,u] 86 | insert :: (PrimMonad m, MVector v e) 87 | => Comparison e -> v (PrimState m) e -> Int -> e -> Int -> m () 88 | insert cmp a l = loop 89 | where 90 | loop val j 91 | | j <= l = unsafeWrite a l val 92 | | otherwise = do e <- unsafeRead a (j - 1) 93 | case cmp val e of 94 | LT -> unsafeWrite a j e >> loop val (j - 1) 95 | _ -> unsafeWrite a j val 96 | {-# INLINE insert #-} 97 | -------------------------------------------------------------------------------- /src/Data/Vector/Algorithms/Intro.hs: -------------------------------------------------------------------------------- 1 | {-# LANGUAGE BangPatterns #-} 2 | {-# LANGUAGE TypeFamilies #-} 3 | {-# LANGUAGE TypeOperators #-} 4 | {-# LANGUAGE ScopedTypeVariables #-} 5 | 6 | -- --------------------------------------------------------------------------- 7 | -- | 8 | -- Module : Data.Vector.Algorithms.Intro 9 | -- Copyright : (c) 2008-2015 Dan Doel 10 | -- Maintainer : Dan Doel 11 | -- Stability : Experimental 12 | -- Portability : Non-portable (type operators, bang patterns) 13 | -- 14 | -- This module implements various algorithms based on the introsort algorithm, 15 | -- originally described by David R. Musser in the paper /Introspective Sorting 16 | -- and Selection Algorithms/. It is also in widespread practical use, as the 17 | -- standard unstable sort used in the C++ Standard Template Library. 18 | -- 19 | -- Introsort is at its core a quicksort. The version implemented here has the 20 | -- following optimizations that make it perform better in practice: 21 | -- 22 | -- * Small segments of the array are left unsorted until a final insertion 23 | -- sort pass. This is faster than recursing all the way down to 24 | -- one-element arrays. 25 | -- 26 | -- * The pivot for segment [l,u) is chosen as the median of the elements at 27 | -- l, u-1 and (u+l)/2. This yields good behavior on mostly sorted (or 28 | -- reverse-sorted) arrays. 29 | -- 30 | -- * The algorithm tracks its recursion depth, and if it decides it is 31 | -- taking too long (depth greater than 2 * lg n), it switches to a heap 32 | -- sort to maintain O(n lg n) worst case behavior. (This is what makes the 33 | -- algorithm introsort). 34 | 35 | module Data.Vector.Algorithms.Intro 36 | ( -- * Sorting 37 | sort 38 | , sortUniq 39 | , sortBy 40 | , sortUniqBy 41 | , sortByBounds 42 | -- * Selecting 43 | , select 44 | , selectBy 45 | , selectByBounds 46 | -- * Partial sorting 47 | , partialSort 48 | , partialSortBy 49 | , partialSortByBounds 50 | , Comparison 51 | ) where 52 | 53 | import Prelude hiding (read, length) 54 | 55 | import Control.Monad 56 | import Control.Monad.Primitive 57 | 58 | import Data.Bits 59 | import Data.Vector.Generic.Mutable 60 | 61 | import Data.Vector.Algorithms.Common (Comparison, midPoint, uniqueMutableBy) 62 | 63 | import qualified Data.Vector.Algorithms.Insertion as I 64 | import qualified Data.Vector.Algorithms.Optimal as O 65 | import qualified Data.Vector.Algorithms.Heap as H 66 | 67 | -- | Sorts an entire array using the default ordering. 68 | sort :: (PrimMonad m, MVector v e, Ord e) => v (PrimState m) e -> m () 69 | sort = sortBy compare 70 | {-# INLINE sort #-} 71 | 72 | -- | A variant on `sort` that returns a vector of unique elements. 73 | sortUniq :: (PrimMonad m, MVector v e, Ord e) => v (PrimState m) e -> m (v (PrimState m) e) 74 | sortUniq = sortUniqBy compare 75 | {-# INLINE sortUniq #-} 76 | 77 | -- | A variant on `sortBy` which returns a vector of unique elements. 78 | sortBy :: (PrimMonad m, MVector v e) => Comparison e -> v (PrimState m) e -> m () 79 | sortBy cmp a = sortByBounds cmp a 0 (length a) 80 | {-# INLINE sortBy #-} 81 | 82 | -- | Sorts an entire array using a custom ordering returning a vector of 83 | -- the unique elements. 84 | sortUniqBy :: (PrimMonad m, MVector v e) => Comparison e -> v (PrimState m) e -> m (v (PrimState m) e) 85 | sortUniqBy cmp a = do 86 | sortByBounds cmp a 0 (length a) 87 | uniqueMutableBy cmp a 88 | {-# INLINE sortUniqBy #-} 89 | 90 | -- | Sorts a portion of an array [l,u) using a custom ordering 91 | sortByBounds 92 | :: (PrimMonad m, MVector v e) 93 | => Comparison e 94 | -> v (PrimState m) e 95 | -> Int -- ^ lower index, l 96 | -> Int -- ^ upper index, u 97 | -> m () 98 | sortByBounds cmp a l u 99 | | len < 2 = return () 100 | | len == 2 = O.sort2ByOffset cmp a l 101 | | len == 3 = O.sort3ByOffset cmp a l 102 | | len == 4 = O.sort4ByOffset cmp a l 103 | | otherwise = introsort cmp a (ilg len) l u 104 | where len = u - l 105 | {-# INLINE sortByBounds #-} 106 | 107 | -- Internal version of the introsort loop which allows partial 108 | -- sort functions to call with a specified bound on iterations. 109 | introsort :: (PrimMonad m, MVector v e) 110 | => Comparison e -> v (PrimState m) e -> Int -> Int -> Int -> m () 111 | introsort cmp a i l u = sort i l u >> I.sortByBounds cmp a l u 112 | where 113 | sort 0 l u = H.sortByBounds cmp a l u 114 | sort d l u 115 | | len < threshold = return () 116 | | otherwise = do O.sort3ByIndex cmp a c l (u-1) -- sort the median into the lowest position 117 | p <- unsafeRead a l 118 | mid <- partitionBy cmp a p (l+1) u 119 | unsafeSwap a l (mid - 1) 120 | sort (d-1) mid u 121 | sort (d-1) l (mid - 1) 122 | where 123 | len = u - l 124 | c = midPoint u l 125 | {-# INLINE introsort #-} 126 | 127 | -- | Moves the least k elements to the front of the array in 128 | -- no particular order. 129 | select 130 | :: (PrimMonad m, MVector v e, Ord e) 131 | => v (PrimState m) e 132 | -> Int -- ^ number of elements to select, k 133 | -> m () 134 | select = selectBy compare 135 | {-# INLINE select #-} 136 | 137 | -- | Moves the least k elements (as defined by the comparison) to 138 | -- the front of the array in no particular order. 139 | selectBy 140 | :: (PrimMonad m, MVector v e) 141 | => Comparison e 142 | -> v (PrimState m) e 143 | -> Int -- ^ number of elements to select, k 144 | -> m () 145 | selectBy cmp a k = selectByBounds cmp a k 0 (length a) 146 | {-# INLINE selectBy #-} 147 | 148 | -- | Moves the least k elements in the interval [l,u) to the positions 149 | -- [l,k+l) in no particular order. 150 | selectByBounds 151 | :: (PrimMonad m, MVector v e) 152 | => Comparison e 153 | -> v (PrimState m) e 154 | -> Int -- ^ number of elements to select, k 155 | -> Int -- ^ lower bound, l 156 | -> Int -- ^ upper bound, u 157 | -> m () 158 | selectByBounds cmp a k l u 159 | | l >= u = return () 160 | | otherwise = go (ilg len) l (l + k) u 161 | where 162 | len = u - l 163 | go 0 l m u = H.selectByBounds cmp a (m - l) l u 164 | go n l m u = do O.sort3ByIndex cmp a c l (u-1) 165 | p <- unsafeRead a l 166 | mid <- partitionBy cmp a p (l+1) u 167 | unsafeSwap a l (mid - 1) 168 | if m > mid 169 | then go (n-1) mid m u 170 | else if m < mid - 1 171 | then go (n-1) l m (mid - 1) 172 | else return () 173 | where c = midPoint u l 174 | {-# INLINE selectByBounds #-} 175 | 176 | -- | Moves the least k elements to the front of the array, sorted. 177 | partialSort 178 | :: (PrimMonad m, MVector v e, Ord e) 179 | => v (PrimState m) e 180 | -> Int -- ^ number of elements to sort, k 181 | -> m () 182 | partialSort = partialSortBy compare 183 | {-# INLINE partialSort #-} 184 | 185 | -- | Moves the least k elements (as defined by the comparison) to 186 | -- the front of the array, sorted. 187 | partialSortBy 188 | :: (PrimMonad m, MVector v e) 189 | => Comparison e 190 | -> v (PrimState m) e 191 | -> Int -- ^ number of elements to sort, k 192 | -> m () 193 | partialSortBy cmp a k = partialSortByBounds cmp a k 0 (length a) 194 | {-# INLINE partialSortBy #-} 195 | 196 | -- | Moves the least k elements in the interval [l,u) to the positions 197 | -- [l,k+l), sorted. 198 | partialSortByBounds 199 | :: (PrimMonad m, MVector v e) 200 | => Comparison e 201 | -> v (PrimState m) e 202 | -> Int -- ^ number of elements to sort, k 203 | -> Int -- ^ lower index, l 204 | -> Int -- ^ upper index, u 205 | -> m () 206 | partialSortByBounds cmp a k l u 207 | | l >= u = return () 208 | | otherwise = let k' = min (u-l) k 209 | -- N.B. Clamp k to the length of the range 210 | -- being sorted. 211 | in go (ilg len) l (l + k') u 212 | where 213 | isort = introsort cmp a 214 | {-# INLINE [1] isort #-} 215 | len = u - l 216 | go 0 l m n = H.partialSortByBounds cmp a (m - l) l u 217 | go n l m u 218 | | l == m = return () 219 | | otherwise = do O.sort3ByIndex cmp a c l (u-1) 220 | p <- unsafeRead a l 221 | mid <- partitionBy cmp a p (l+1) u 222 | unsafeSwap a l (mid - 1) 223 | case compare m mid of 224 | GT -> do isort (n-1) l (mid - 1) 225 | go (n-1) mid m u 226 | EQ -> isort (n-1) l m 227 | LT -> go n l m (mid - 1) 228 | where c = midPoint u l 229 | {-# INLINE partialSortByBounds #-} 230 | 231 | partitionBy :: forall m v e. (PrimMonad m, MVector v e) 232 | => Comparison e -> v (PrimState m) e -> e -> Int -> Int -> m Int 233 | partitionBy cmp a = partUp 234 | where 235 | partUp :: e -> Int -> Int -> m Int 236 | partUp p l u 237 | | l < u = do e <- unsafeRead a l 238 | case cmp e p of 239 | LT -> partUp p (l+1) u 240 | _ -> partDown p l (u-1) 241 | | otherwise = return l 242 | 243 | partDown :: e -> Int -> Int -> m Int 244 | partDown p l u 245 | | l < u = do e <- unsafeRead a u 246 | case cmp p e of 247 | LT -> partDown p l (u-1) 248 | _ -> unsafeSwap a l u >> partUp p (l+1) u 249 | | otherwise = return l 250 | {-# INLINE partitionBy #-} 251 | 252 | -- computes the number of recursive calls after which heapsort should 253 | -- be invoked given the lower and upper indices of the array to be sorted 254 | ilg :: Int -> Int 255 | ilg m = 2 * loop m 0 256 | where 257 | loop 0 !k = k - 1 258 | loop n !k = loop (n `shiftR` 1) (k+1) 259 | 260 | -- the size of array at which the introsort algorithm switches to insertion sort 261 | threshold :: Int 262 | threshold = 18 263 | {-# INLINE threshold #-} 264 | -------------------------------------------------------------------------------- /src/Data/Vector/Algorithms/Merge.hs: -------------------------------------------------------------------------------- 1 | {-# LANGUAGE BangPatterns #-} 2 | {-# LANGUAGE TypeFamilies #-} 3 | 4 | -- --------------------------------------------------------------------------- 5 | -- | 6 | -- Module : Data.Vector.Algorithms.Merge 7 | -- Copyright : (c) 2008-2011 Dan Doel 8 | -- Maintainer : Dan Doel 9 | -- Stability : Experimental 10 | -- Portability : Portable 11 | -- 12 | -- This module implements a simple top-down merge sort. The temporary buffer 13 | -- is preallocated to 1/2 the size of the input array, and shared through 14 | -- the entire sorting process to ease the amount of allocation performed in 15 | -- total. This is a stable sort. 16 | 17 | module Data.Vector.Algorithms.Merge 18 | ( sort 19 | , sortUniq 20 | , sortBy 21 | , sortUniqBy 22 | , Comparison 23 | ) where 24 | 25 | import Prelude hiding (read, length) 26 | 27 | import Control.Monad.Primitive 28 | 29 | import Data.Bits 30 | import Data.Vector.Generic.Mutable 31 | 32 | import Data.Vector.Algorithms.Common (Comparison, copyOffset, midPoint, uniqueMutableBy) 33 | 34 | import qualified Data.Vector.Algorithms.Optimal as O 35 | import qualified Data.Vector.Algorithms.Insertion as I 36 | 37 | -- | Sorts an array using the default comparison. 38 | sort :: (PrimMonad m, MVector v e, Ord e) => v (PrimState m) e -> m () 39 | sort = sortBy compare 40 | {-# INLINE sort #-} 41 | 42 | -- | A variant on `sort` that returns a vector of unique elements. 43 | sortUniq :: (PrimMonad m, MVector v e, Ord e) => v (PrimState m) e -> m (v (PrimState m) e) 44 | sortUniq = sortUniqBy compare 45 | {-# INLINE sortUniq #-} 46 | 47 | -- | Sorts an array using a custom comparison. 48 | sortBy :: (PrimMonad m, MVector v e) => Comparison e -> v (PrimState m) e -> m () 49 | sortBy cmp vec = if len <= 4 50 | then if len <= 2 51 | then if len /= 2 52 | then return () 53 | else O.sort2ByOffset cmp vec 0 54 | else if len == 3 55 | then O.sort3ByOffset cmp vec 0 56 | else O.sort4ByOffset cmp vec 0 57 | else if len < threshold 58 | then I.sortByBounds cmp vec 0 len 59 | else do buf <- new halfLen 60 | mergeSortWithBuf cmp vec buf 61 | where 62 | len = length vec 63 | -- odd lengths have a larger half that needs to fit, so use ceiling, not floor 64 | halfLen = (len + 1) `div` 2 65 | {-# INLINE sortBy #-} 66 | 67 | -- | A variant on `sortBy` which returns a vector of unique elements. 68 | sortUniqBy :: (PrimMonad m, MVector v e) => Comparison e -> v (PrimState m) e -> m (v (PrimState m) e) 69 | sortUniqBy cmp vec = do 70 | sortBy cmp vec 71 | uniqueMutableBy cmp vec 72 | {-# INLINE sortUniqBy #-} 73 | 74 | mergeSortWithBuf :: (PrimMonad m, MVector v e) 75 | => Comparison e -> v (PrimState m) e -> v (PrimState m) e -> m () 76 | mergeSortWithBuf cmp src buf = loop 0 (length src) 77 | where 78 | loop l u 79 | | len < threshold = I.sortByBounds cmp src l u 80 | | otherwise = do loop l mid 81 | loop mid u 82 | merge cmp (unsafeSlice l len src) buf (mid - l) 83 | where len = u - l 84 | mid = midPoint u l 85 | {-# INLINE mergeSortWithBuf #-} 86 | 87 | merge :: (PrimMonad m, MVector v e) 88 | => Comparison e -> v (PrimState m) e -> v (PrimState m) e 89 | -> Int -> m () 90 | merge cmp src buf mid = do unsafeCopy tmp lower 91 | eTmp <- unsafeRead tmp 0 92 | eUpp <- unsafeRead upper 0 93 | loop tmp 0 eTmp upper 0 eUpp 0 94 | where 95 | lower = unsafeSlice 0 mid src 96 | upper = unsafeSlice mid (length src - mid) src 97 | tmp = unsafeSlice 0 mid buf 98 | 99 | wroteHigh low iLow eLow high iHigh iIns 100 | | iHigh >= length high = unsafeCopy (unsafeSlice iIns (length low - iLow) src) 101 | (unsafeSlice iLow (length low - iLow) low) 102 | | otherwise = do eHigh <- unsafeRead high iHigh 103 | loop low iLow eLow high iHigh eHigh iIns 104 | 105 | wroteLow low iLow high iHigh eHigh iIns 106 | | iLow >= length low = return () 107 | | otherwise = do eLow <- unsafeRead low iLow 108 | loop low iLow eLow high iHigh eHigh iIns 109 | 110 | loop !low !iLow !eLow !high !iHigh !eHigh !iIns = case cmp eHigh eLow of 111 | LT -> do unsafeWrite src iIns eHigh 112 | wroteHigh low iLow eLow high (iHigh + 1) (iIns + 1) 113 | _ -> do unsafeWrite src iIns eLow 114 | wroteLow low (iLow + 1) high iHigh eHigh (iIns + 1) 115 | {-# INLINE merge #-} 116 | 117 | threshold :: Int 118 | threshold = 25 119 | {-# INLINE threshold #-} 120 | -------------------------------------------------------------------------------- /src/Data/Vector/Algorithms/Optimal.hs: -------------------------------------------------------------------------------- 1 | {-# LANGUAGE CPP #-} 2 | 3 | -- --------------------------------------------------------------------------- 4 | -- | 5 | -- Module : Data.Vector.Algorithms.Optimal 6 | -- Copyright : (c) 2008-2010 Dan Doel 7 | -- Maintainer : Dan Doel 8 | -- Stability : Experimental 9 | -- Portability : Portable 10 | -- 11 | -- Optimal sorts for very small array sizes, or for small numbers of 12 | -- particular indices in a larger array (to be used, for instance, for 13 | -- sorting a median of 3 values into the lowest position in an array 14 | -- for a median-of-3 quicksort). 15 | 16 | -- The code herein was adapted from a C algorithm for optimal sorts 17 | -- of small arrays. The original code was produced for the article 18 | -- /Sorting Revisited/ by Paul Hsieh, available here: 19 | -- 20 | -- http://www.azillionmonkeys.com/qed/sort.html 21 | -- 22 | -- The LICENSE file contains the relevant copyright information for 23 | -- the reference C code. 24 | 25 | module Data.Vector.Algorithms.Optimal 26 | ( sort2ByIndex 27 | , sort2ByOffset 28 | , sort3ByIndex 29 | , sort3ByOffset 30 | , sort4ByIndex 31 | , sort4ByOffset 32 | , Comparison 33 | ) where 34 | 35 | import Prelude hiding (read, length) 36 | 37 | import Control.Monad.Primitive 38 | 39 | import Data.Vector.Generic.Mutable 40 | 41 | import Data.Vector.Algorithms.Common (Comparison) 42 | 43 | #if MIN_VERSION_vector(0,13,0) 44 | import qualified Data.Vector.Internal.Check as Ck 45 | # define CHECK_INDEX(name, i, n) Ck.checkIndex Ck.Unsafe (i) (n) 46 | #else 47 | # define CHECK_INDEX(name, i, n) UNSAFE_CHECK(checkIndex) name (i) (n) 48 | #endif 49 | 50 | #include "vector.h" 51 | 52 | -- | Sorts the elements at the positions 'off' and 'off + 1' in the given 53 | -- array using the comparison. 54 | sort2ByOffset :: (PrimMonad m, MVector v e) 55 | => Comparison e -> v (PrimState m) e -> Int -> m () 56 | sort2ByOffset cmp a off = sort2ByIndex cmp a off (off + 1) 57 | {-# INLINABLE sort2ByOffset #-} 58 | 59 | -- | Sorts the elements at the two given indices using the comparison. This 60 | -- is essentially a compare-and-swap, although the first index is assumed to 61 | -- be the 'lower' of the two. 62 | sort2ByIndex :: (PrimMonad m, MVector v e) 63 | => Comparison e -> v (PrimState m) e -> Int -> Int -> m () 64 | sort2ByIndex cmp a i j = CHECK_INDEX("sort2ByIndex", i, length a) 65 | $ CHECK_INDEX("sort2ByIndex", j, length a) $ do 66 | a0 <- unsafeRead a i 67 | a1 <- unsafeRead a j 68 | case cmp a0 a1 of 69 | GT -> unsafeWrite a i a1 >> unsafeWrite a j a0 70 | _ -> return () 71 | {-# INLINABLE sort2ByIndex #-} 72 | 73 | -- | Sorts the three elements starting at the given offset in the array. 74 | sort3ByOffset :: (PrimMonad m, MVector v e) 75 | => Comparison e -> v (PrimState m) e -> Int -> m () 76 | sort3ByOffset cmp a off = sort3ByIndex cmp a off (off + 1) (off + 2) 77 | {-# INLINABLE sort3ByOffset #-} 78 | 79 | -- | Sorts the elements at the three given indices. The indices are assumed 80 | -- to be given from lowest to highest, so if 'l < m < u' then 81 | -- 'sort3ByIndex cmp a m l u' essentially sorts the median of three into the 82 | -- lowest position in the array. 83 | sort3ByIndex :: (PrimMonad m, MVector v e) 84 | => Comparison e -> v (PrimState m) e -> Int -> Int -> Int -> m () 85 | sort3ByIndex cmp a i j k = CHECK_INDEX("sort3ByIndex", i, length a) 86 | $ CHECK_INDEX("sort3ByIndex", j, length a) 87 | $ CHECK_INDEX("sort3ByIndex", k, length a) $ do 88 | a0 <- unsafeRead a i 89 | a1 <- unsafeRead a j 90 | a2 <- unsafeRead a k 91 | case cmp a0 a1 of 92 | GT -> case cmp a0 a2 of 93 | GT -> case cmp a2 a1 of 94 | LT -> do unsafeWrite a i a2 95 | unsafeWrite a k a0 96 | _ -> do unsafeWrite a i a1 97 | unsafeWrite a j a2 98 | unsafeWrite a k a0 99 | _ -> do unsafeWrite a i a1 100 | unsafeWrite a j a0 101 | _ -> case cmp a1 a2 of 102 | GT -> case cmp a0 a2 of 103 | GT -> do unsafeWrite a i a2 104 | unsafeWrite a j a0 105 | unsafeWrite a k a1 106 | _ -> do unsafeWrite a j a2 107 | unsafeWrite a k a1 108 | _ -> return () 109 | {-# INLINABLE sort3ByIndex #-} 110 | 111 | -- | Sorts the four elements beginning at the offset. 112 | sort4ByOffset :: (PrimMonad m, MVector v e) 113 | => Comparison e -> v (PrimState m) e -> Int -> m () 114 | sort4ByOffset cmp a off = sort4ByIndex cmp a off (off + 1) (off + 2) (off + 3) 115 | {-# INLINABLE sort4ByOffset #-} 116 | 117 | -- The horror... 118 | 119 | -- | Sorts the elements at the four given indices. Like the 2 and 3 element 120 | -- versions, this assumes that the indices are given in increasing order, so 121 | -- it can be used to sort medians into particular positions and so on. 122 | sort4ByIndex :: (PrimMonad m, MVector v e) 123 | => Comparison e -> v (PrimState m) e -> Int -> Int -> Int -> Int -> m () 124 | sort4ByIndex cmp a i j k l = CHECK_INDEX("sort4ByIndex", i, length a) 125 | $ CHECK_INDEX("sort4ByIndex", j, length a) 126 | $ CHECK_INDEX("sort4ByIndex", k, length a) 127 | $ CHECK_INDEX("sort4ByIndex", l, length a) $ do 128 | a0 <- unsafeRead a i 129 | a1 <- unsafeRead a j 130 | a2 <- unsafeRead a k 131 | a3 <- unsafeRead a l 132 | case cmp a0 a1 of 133 | GT -> case cmp a0 a2 of 134 | GT -> case cmp a1 a2 of 135 | GT -> case cmp a1 a3 of 136 | GT -> case cmp a2 a3 of 137 | GT -> do unsafeWrite a i a3 138 | unsafeWrite a j a2 139 | unsafeWrite a k a1 140 | unsafeWrite a l a0 141 | _ -> do unsafeWrite a i a2 142 | unsafeWrite a j a3 143 | unsafeWrite a k a1 144 | unsafeWrite a l a0 145 | _ -> case cmp a0 a3 of 146 | GT -> do unsafeWrite a i a2 147 | unsafeWrite a j a1 148 | unsafeWrite a k a3 149 | unsafeWrite a l a0 150 | _ -> do unsafeWrite a i a2 151 | unsafeWrite a j a1 152 | unsafeWrite a k a0 153 | unsafeWrite a l a3 154 | _ -> case cmp a2 a3 of 155 | GT -> case cmp a1 a3 of 156 | GT -> do unsafeWrite a i a3 157 | unsafeWrite a j a1 158 | unsafeWrite a k a2 159 | unsafeWrite a l a0 160 | _ -> do unsafeWrite a i a1 161 | unsafeWrite a j a3 162 | unsafeWrite a k a2 163 | unsafeWrite a l a0 164 | _ -> case cmp a0 a3 of 165 | GT -> do unsafeWrite a i a1 166 | unsafeWrite a j a2 167 | unsafeWrite a k a3 168 | unsafeWrite a l a0 169 | _ -> do unsafeWrite a i a1 170 | unsafeWrite a j a2 171 | unsafeWrite a k a0 172 | -- unsafeWrite a l a3 173 | _ -> case cmp a0 a3 of 174 | GT -> case cmp a1 a3 of 175 | GT -> do unsafeWrite a i a3 176 | -- unsafeWrite a j a1 177 | unsafeWrite a k a0 178 | unsafeWrite a l a2 179 | _ -> do unsafeWrite a i a1 180 | unsafeWrite a j a3 181 | unsafeWrite a k a0 182 | unsafeWrite a l a2 183 | _ -> case cmp a2 a3 of 184 | GT -> do unsafeWrite a i a1 185 | unsafeWrite a j a0 186 | unsafeWrite a k a3 187 | unsafeWrite a l a2 188 | _ -> do unsafeWrite a i a1 189 | unsafeWrite a j a0 190 | -- unsafeWrite a k a2 191 | -- unsafeWrite a l a3 192 | _ -> case cmp a1 a2 of 193 | GT -> case cmp a0 a2 of 194 | GT -> case cmp a0 a3 of 195 | GT -> case cmp a2 a3 of 196 | GT -> do unsafeWrite a i a3 197 | unsafeWrite a j a2 198 | unsafeWrite a k a0 199 | unsafeWrite a l a1 200 | _ -> do unsafeWrite a i a2 201 | unsafeWrite a j a3 202 | unsafeWrite a k a0 203 | unsafeWrite a l a1 204 | _ -> case cmp a1 a3 of 205 | GT -> do unsafeWrite a i a2 206 | unsafeWrite a j a0 207 | unsafeWrite a k a3 208 | unsafeWrite a l a1 209 | _ -> do unsafeWrite a i a2 210 | unsafeWrite a j a0 211 | unsafeWrite a k a1 212 | -- unsafeWrite a l a3 213 | _ -> case cmp a2 a3 of 214 | GT -> case cmp a0 a3 of 215 | GT -> do unsafeWrite a i a3 216 | unsafeWrite a j a0 217 | -- unsafeWrite a k a2 218 | unsafeWrite a l a1 219 | _ -> do -- unsafeWrite a i a0 220 | unsafeWrite a j a3 221 | -- unsafeWrite a k a2 222 | unsafeWrite a l a1 223 | _ -> case cmp a1 a3 of 224 | GT -> do -- unsafeWrite a i a0 225 | unsafeWrite a j a2 226 | unsafeWrite a k a3 227 | unsafeWrite a l a1 228 | _ -> do -- unsafeWrite a i a0 229 | unsafeWrite a j a2 230 | unsafeWrite a k a1 231 | -- unsafeWrite a l a3 232 | _ -> case cmp a1 a3 of 233 | GT -> case cmp a0 a3 of 234 | GT -> do unsafeWrite a i a3 235 | unsafeWrite a j a0 236 | unsafeWrite a k a1 237 | unsafeWrite a l a2 238 | _ -> do -- unsafeWrite a i a0 239 | unsafeWrite a j a3 240 | unsafeWrite a k a1 241 | unsafeWrite a l a2 242 | _ -> case cmp a2 a3 of 243 | GT -> do -- unsafeWrite a i a0 244 | -- unsafeWrite a j a1 245 | unsafeWrite a k a3 246 | unsafeWrite a l a2 247 | _ -> do -- unsafeWrite a i a0 248 | -- unsafeWrite a j a1 249 | -- unsafeWrite a k a2 250 | -- unsafeWrite a l a3 251 | return () 252 | {-# INLINABLE sort4ByIndex #-} 253 | -------------------------------------------------------------------------------- /src/Data/Vector/Algorithms/Radix.hs: -------------------------------------------------------------------------------- 1 | {-# LANGUAGE BangPatterns #-} 2 | {-# LANGUAGE TypeFamilies #-} 3 | {-# LANGUAGE TypeOperators #-} 4 | {-# LANGUAGE ScopedTypeVariables #-} 5 | 6 | -- --------------------------------------------------------------------------- 7 | -- | 8 | -- Module : Data.Vector.Algorithms.Radix 9 | -- Copyright : (c) 2008-2011 Dan Doel 10 | -- Maintainer : Dan Doel 11 | -- Stability : Experimental 12 | -- Portability : Non-portable (scoped type variables, bang patterns) 13 | -- 14 | -- This module provides a radix sort for a subclass of unboxed arrays. The 15 | -- radix class gives information on 16 | -- * the number of passes needed for the data type 17 | -- 18 | -- * the size of the auxiliary arrays 19 | -- 20 | -- * how to compute the pass-k radix of a value 21 | -- 22 | -- Radix sort is not a comparison sort, so it is able to achieve O(n) run 23 | -- time, though it also uses O(n) auxiliary space. In addition, there is a 24 | -- constant space overhead of 2*size*sizeOf(Int) for the sort, so it is not 25 | -- advisable to use this sort for large numbers of very small arrays. 26 | -- 27 | -- A standard example (upon which one could base their own Radix instance) 28 | -- is Word32: 29 | -- 30 | -- * We choose to sort on r = 8 bits at a time 31 | -- 32 | -- * A Word32 has b = 32 bits total 33 | -- 34 | -- Thus, b/r = 4 passes are required, 2^r = 256 elements are needed in an 35 | -- auxiliary array, and the radix function is: 36 | -- 37 | -- > radix k e = (e `shiftR` (k*8)) .&. 255 38 | 39 | module Data.Vector.Algorithms.Radix (sort, sortBy, Radix(..)) where 40 | 41 | import Prelude hiding (read, length) 42 | 43 | import Control.Monad 44 | import Control.Monad.Primitive 45 | 46 | import qualified Data.Vector.Primitive.Mutable as PV 47 | import Data.Vector.Generic.Mutable 48 | 49 | import Data.Vector.Algorithms.Common 50 | 51 | import Data.Bits 52 | import Data.Int 53 | import Data.Word 54 | 55 | 56 | import Foreign.Storable 57 | 58 | class Radix e where 59 | -- | The number of passes necessary to sort an array of es 60 | passes :: e -> Int 61 | -- | The size of an auxiliary array 62 | size :: e -> Int 63 | -- | The radix function parameterized by the current pass 64 | radix :: Int -> e -> Int 65 | 66 | instance Radix Int where 67 | passes _ = sizeOf (undefined :: Int) 68 | {-# INLINE passes #-} 69 | size _ = 256 70 | {-# INLINE size #-} 71 | radix 0 e = e .&. 255 72 | radix i e 73 | | i == passes e - 1 = radix' (e `xor` minBound) 74 | | otherwise = radix' e 75 | where radix' e = (e `shiftR` (i `shiftL` 3)) .&. 255 76 | {-# INLINE radix #-} 77 | 78 | instance Radix Int8 where 79 | passes _ = 1 80 | {-# INLINE passes #-} 81 | size _ = 256 82 | {-# INLINE size #-} 83 | radix _ e = 255 .&. fromIntegral e `xor` 128 84 | {-# INLINE radix #-} 85 | 86 | instance Radix Int16 where 87 | passes _ = 2 88 | {-# INLINE passes #-} 89 | size _ = 256 90 | {-# INLINE size #-} 91 | radix 0 e = fromIntegral (e .&. 255) 92 | radix 1 e = fromIntegral (((e `xor` minBound) `shiftR` 8) .&. 255) 93 | {-# INLINE radix #-} 94 | 95 | instance Radix Int32 where 96 | passes _ = 4 97 | {-# INLINE passes #-} 98 | size _ = 256 99 | {-# INLINE size #-} 100 | radix 0 e = fromIntegral (e .&. 255) 101 | radix 1 e = fromIntegral ((e `shiftR` 8) .&. 255) 102 | radix 2 e = fromIntegral ((e `shiftR` 16) .&. 255) 103 | radix 3 e = fromIntegral (((e `xor` minBound) `shiftR` 24) .&. 255) 104 | {-# INLINE radix #-} 105 | 106 | instance Radix Int64 where 107 | passes _ = 8 108 | {-# INLINE passes #-} 109 | size _ = 256 110 | {-# INLINE size #-} 111 | radix 0 e = fromIntegral (e .&. 255) 112 | radix 1 e = fromIntegral ((e `shiftR` 8) .&. 255) 113 | radix 2 e = fromIntegral ((e `shiftR` 16) .&. 255) 114 | radix 3 e = fromIntegral ((e `shiftR` 24) .&. 255) 115 | radix 4 e = fromIntegral ((e `shiftR` 32) .&. 255) 116 | radix 5 e = fromIntegral ((e `shiftR` 40) .&. 255) 117 | radix 6 e = fromIntegral ((e `shiftR` 48) .&. 255) 118 | radix 7 e = fromIntegral (((e `xor` minBound) `shiftR` 56) .&. 255) 119 | {-# INLINE radix #-} 120 | 121 | instance Radix Word where 122 | passes _ = sizeOf (undefined :: Word) 123 | {-# INLINE passes #-} 124 | size _ = 256 125 | {-# INLINE size #-} 126 | radix 0 e = fromIntegral (e .&. 255) 127 | radix i e = fromIntegral ((e `shiftR` (i `shiftL` 3)) .&. 255) 128 | {-# INLINE radix #-} 129 | 130 | instance Radix Word8 where 131 | passes _ = 1 132 | {-# INLINE passes #-} 133 | size _ = 256 134 | {-# INLINE size #-} 135 | radix _ = fromIntegral 136 | {-# INLINE radix #-} 137 | 138 | instance Radix Word16 where 139 | passes _ = 2 140 | {-# INLINE passes #-} 141 | size _ = 256 142 | {-# INLINE size #-} 143 | radix 0 e = fromIntegral (e .&. 255) 144 | radix 1 e = fromIntegral ((e `shiftR` 8) .&. 255) 145 | {-# INLINE radix #-} 146 | 147 | instance Radix Word32 where 148 | passes _ = 4 149 | {-# INLINE passes #-} 150 | size _ = 256 151 | {-# INLINE size #-} 152 | radix 0 e = fromIntegral (e .&. 255) 153 | radix 1 e = fromIntegral ((e `shiftR` 8) .&. 255) 154 | radix 2 e = fromIntegral ((e `shiftR` 16) .&. 255) 155 | radix 3 e = fromIntegral ((e `shiftR` 24) .&. 255) 156 | {-# INLINE radix #-} 157 | 158 | instance Radix Word64 where 159 | passes _ = 8 160 | {-# INLINE passes #-} 161 | size _ = 256 162 | {-# INLINE size #-} 163 | radix 0 e = fromIntegral (e .&. 255) 164 | radix 1 e = fromIntegral ((e `shiftR` 8) .&. 255) 165 | radix 2 e = fromIntegral ((e `shiftR` 16) .&. 255) 166 | radix 3 e = fromIntegral ((e `shiftR` 24) .&. 255) 167 | radix 4 e = fromIntegral ((e `shiftR` 32) .&. 255) 168 | radix 5 e = fromIntegral ((e `shiftR` 40) .&. 255) 169 | radix 6 e = fromIntegral ((e `shiftR` 48) .&. 255) 170 | radix 7 e = fromIntegral ((e `shiftR` 56) .&. 255) 171 | {-# INLINE radix #-} 172 | 173 | instance (Radix i, Radix j) => Radix (i, j) where 174 | passes ~(i, j) = passes i + passes j 175 | {-# INLINE passes #-} 176 | size ~(i, j) = size i `max` size j 177 | {-# INLINE size #-} 178 | radix k ~(i, j) | k < passes j = radix k j 179 | | otherwise = radix (k - passes j) i 180 | {-# INLINE radix #-} 181 | 182 | -- | Sorts an array based on the Radix instance. 183 | sort :: forall e m v. (PrimMonad m, MVector v e, Radix e) 184 | => v (PrimState m) e -> m () 185 | sort arr = sortBy (passes e) (size e) radix arr 186 | where 187 | e :: e 188 | e = undefined 189 | {-# INLINE sort #-} 190 | 191 | -- | Radix sorts an array using custom radix information 192 | -- requires the number of passes to fully sort the array, 193 | -- the size of of auxiliary arrays necessary (should be 194 | -- one greater than the maximum value returned by the radix 195 | -- function), and a radix function, which takes the pass 196 | -- and an element, and returns the relevant radix. 197 | sortBy :: (PrimMonad m, MVector v e) 198 | => Int -- ^ the number of passes 199 | -> Int -- ^ the size of auxiliary arrays 200 | -> (Int -> e -> Int) -- ^ the radix function 201 | -> v (PrimState m) e -- ^ the array to be sorted 202 | -> m () 203 | sortBy passes size rdx arr = do 204 | tmp <- new (length arr) 205 | count <- new size 206 | radixLoop passes rdx arr tmp count 207 | {-# INLINE sortBy #-} 208 | 209 | radixLoop :: (PrimMonad m, MVector v e) 210 | => Int -- passes 211 | -> (Int -> e -> Int) -- radix function 212 | -> v (PrimState m) e -- array to sort 213 | -> v (PrimState m) e -- temporary array 214 | -> PV.MVector (PrimState m) Int -- radix count array 215 | -> m () 216 | radixLoop passes rdx src dst count = go False 0 217 | where 218 | len = length src 219 | go swap k 220 | | k < passes = if swap 221 | then body rdx dst src count k >> go (not swap) (k+1) 222 | else body rdx src dst count k >> go (not swap) (k+1) 223 | | otherwise = when swap (unsafeCopy src dst) 224 | {-# INLINE radixLoop #-} 225 | 226 | body :: (PrimMonad m, MVector v e) 227 | => (Int -> e -> Int) -- radix function 228 | -> v (PrimState m) e -- source array 229 | -> v (PrimState m) e -- destination array 230 | -> PV.MVector (PrimState m) Int -- radix count 231 | -> Int -- current pass 232 | -> m () 233 | body rdx src dst count k = do 234 | countLoop (rdx k) src count 235 | accumulate count 236 | moveLoop k rdx src dst count 237 | {-# INLINE body #-} 238 | 239 | accumulate :: (PrimMonad m) 240 | => PV.MVector (PrimState m) Int -> m () 241 | accumulate count = go 0 0 242 | where 243 | len = length count 244 | go i acc 245 | | i < len = do ci <- unsafeRead count i 246 | unsafeWrite count i acc 247 | go (i+1) (acc + ci) 248 | | otherwise = return () 249 | {-# INLINE accumulate #-} 250 | 251 | moveLoop :: (PrimMonad m, MVector v e) 252 | => Int -> (Int -> e -> Int) -> v (PrimState m) e 253 | -> v (PrimState m) e -> PV.MVector (PrimState m) Int -> m () 254 | moveLoop k rdx src dst prefix = go 0 255 | where 256 | len = length src 257 | go i 258 | | i < len = do srci <- unsafeRead src i 259 | pf <- inc prefix (rdx k srci) 260 | unsafeWrite dst pf srci 261 | go (i+1) 262 | | otherwise = return () 263 | {-# INLINE moveLoop #-} 264 | 265 | -------------------------------------------------------------------------------- /src/Data/Vector/Algorithms/Search.hs: -------------------------------------------------------------------------------- 1 | {-# LANGUAGE BangPatterns #-} 2 | {-# LANGUAGE TypeFamilies #-} 3 | 4 | -- --------------------------------------------------------------------------- 5 | -- | 6 | -- Module : Data.Vector.Algorithms.Search 7 | -- Copyright : (c) 2009-2015 Dan Doel, 2015 Tim Baumann 8 | -- Maintainer : Dan Doel 9 | -- Stability : Experimental 10 | -- Portability : Non-portable (bang patterns) 11 | -- 12 | -- This module implements several methods of searching for indicies to insert 13 | -- elements into a sorted vector. 14 | 15 | module Data.Vector.Algorithms.Search 16 | ( binarySearch 17 | , binarySearchBy 18 | , binarySearchByBounds 19 | , binarySearchL 20 | , binarySearchLBy 21 | , binarySearchLByBounds 22 | , binarySearchR 23 | , binarySearchRBy 24 | , binarySearchRByBounds 25 | , binarySearchP 26 | , binarySearchPBounds 27 | , gallopingSearchLeftP 28 | , gallopingSearchLeftPBounds 29 | , gallopingSearchRightP 30 | , gallopingSearchRightPBounds 31 | , Comparison 32 | ) where 33 | 34 | import Prelude hiding (read, length) 35 | 36 | import Control.Monad.Primitive 37 | 38 | import Data.Bits 39 | 40 | import Data.Vector.Generic.Mutable 41 | 42 | import Data.Vector.Algorithms.Common (Comparison, midPoint) 43 | 44 | -- | Finds an index in a given sorted vector at which the given element could 45 | -- be inserted while maintaining the sortedness of the vector. 46 | binarySearch :: (PrimMonad m, MVector v e, Ord e) 47 | => v (PrimState m) e -> e -> m Int 48 | binarySearch = binarySearchBy compare 49 | {-# INLINE binarySearch #-} 50 | 51 | -- | Finds an index in a given vector, which must be sorted with respect to the 52 | -- given comparison function, at which the given element could be inserted while 53 | -- preserving the vector's sortedness. 54 | binarySearchBy :: (PrimMonad m, MVector v e) 55 | => Comparison e -> v (PrimState m) e -> e -> m Int 56 | binarySearchBy cmp vec e = binarySearchByBounds cmp vec e 0 (length vec) 57 | {-# INLINE binarySearchBy #-} 58 | 59 | -- | Given a vector sorted with respect to a given comparison function in indices 60 | -- in [l,u), finds an index in [l,u] at which the given element could be inserted 61 | -- while preserving sortedness. 62 | binarySearchByBounds :: (PrimMonad m, MVector v e) 63 | => Comparison e -> v (PrimState m) e -> e -> Int -> Int -> m Int 64 | binarySearchByBounds cmp vec e = loop 65 | where 66 | loop !l !u 67 | | u <= l = return l 68 | | otherwise = do e' <- unsafeRead vec k 69 | case cmp e' e of 70 | LT -> loop (k+1) u 71 | EQ -> return k 72 | GT -> loop l k 73 | where k = midPoint u l 74 | {-# INLINE binarySearchByBounds #-} 75 | 76 | -- | Finds the lowest index in a given sorted vector at which the given element 77 | -- could be inserted while maintaining the sortedness. 78 | binarySearchL :: (PrimMonad m, MVector v e, Ord e) => v (PrimState m) e -> e -> m Int 79 | binarySearchL = binarySearchLBy compare 80 | {-# INLINE binarySearchL #-} 81 | 82 | -- | Finds the lowest index in a given vector, which must be sorted with respect to 83 | -- the given comparison function, at which the given element could be inserted 84 | -- while preserving the sortedness. 85 | binarySearchLBy :: (PrimMonad m, MVector v e) 86 | => Comparison e -> v (PrimState m) e -> e -> m Int 87 | binarySearchLBy cmp vec e = binarySearchLByBounds cmp vec e 0 (length vec) 88 | {-# INLINE binarySearchLBy #-} 89 | 90 | -- | Given a vector sorted with respect to a given comparison function on indices 91 | -- in [l,u), finds the lowest index in [l,u] at which the given element could be 92 | -- inserted while preserving sortedness. 93 | binarySearchLByBounds :: (PrimMonad m, MVector v e) 94 | => Comparison e -> v (PrimState m) e -> e -> Int -> Int -> m Int 95 | binarySearchLByBounds cmp vec e = binarySearchPBounds p vec 96 | where p e' = case cmp e' e of LT -> False ; _ -> True 97 | {-# INLINE binarySearchLByBounds #-} 98 | 99 | -- | Finds the greatest index in a given sorted vector at which the given element 100 | -- could be inserted while maintaining sortedness. 101 | binarySearchR :: (PrimMonad m, MVector v e, Ord e) => v (PrimState m) e -> e -> m Int 102 | binarySearchR = binarySearchRBy compare 103 | {-# INLINE binarySearchR #-} 104 | 105 | -- | Finds the greatest index in a given vector, which must be sorted with respect to 106 | -- the given comparison function, at which the given element could be inserted 107 | -- while preserving the sortedness. 108 | binarySearchRBy :: (PrimMonad m, MVector v e) 109 | => Comparison e -> v (PrimState m) e -> e -> m Int 110 | binarySearchRBy cmp vec e = binarySearchRByBounds cmp vec e 0 (length vec) 111 | {-# INLINE binarySearchRBy #-} 112 | 113 | -- | Given a vector sorted with respect to the given comparison function on indices 114 | -- in [l,u), finds the greatest index in [l,u] at which the given element could be 115 | -- inserted while preserving sortedness. 116 | binarySearchRByBounds :: (PrimMonad m, MVector v e) 117 | => Comparison e -> v (PrimState m) e -> e -> Int -> Int -> m Int 118 | binarySearchRByBounds cmp vec e = binarySearchPBounds p vec 119 | where p e' = case cmp e' e of GT -> True ; _ -> False 120 | {-# INLINE binarySearchRByBounds #-} 121 | 122 | -- | Given a predicate that is guaranteed to be monotone on the given vector, 123 | -- finds the first index at which the predicate returns True, or the length of 124 | -- the array if the predicate is false for the entire array. 125 | binarySearchP :: (PrimMonad m, MVector v e) => (e -> Bool) -> v (PrimState m) e -> m Int 126 | binarySearchP p vec = binarySearchPBounds p vec 0 (length vec) 127 | {-# INLINE binarySearchP #-} 128 | 129 | -- | Given a predicate that is guaranteed to be monotone on the indices [l,u) in 130 | -- a given vector, finds the index in [l,u] at which the predicate turns from 131 | -- False to True (yielding u if the entire interval is False). 132 | binarySearchPBounds :: (PrimMonad m, MVector v e) 133 | => (e -> Bool) -> v (PrimState m) e -> Int -> Int -> m Int 134 | binarySearchPBounds p vec = loop 135 | where 136 | loop !l !u 137 | | u <= l = return l 138 | | otherwise = unsafeRead vec k >>= \e -> if p e then loop l k else loop (k+1) u 139 | where k = midPoint u l 140 | {-# INLINE binarySearchPBounds #-} 141 | 142 | -- | Given a predicate that is guaranteed to be monotone on the vector elements 143 | -- in order, finds the index at which the predicate turns from False to True. 144 | -- The length of the vector is returned if the predicate is False for the entire 145 | -- vector. 146 | -- 147 | -- Begins searching at the start of the vector, in increasing steps of size 2^n. 148 | gallopingSearchLeftP 149 | :: (PrimMonad m, MVector v e) => (e -> Bool) -> v (PrimState m) e -> m Int 150 | gallopingSearchLeftP p vec = gallopingSearchLeftPBounds p vec 0 (length vec) 151 | {-# INLINE gallopingSearchLeftP #-} 152 | 153 | -- | Given a predicate that is guaranteed to be monotone on the vector elements 154 | -- in order, finds the index at which the predicate turns from False to True. 155 | -- The length of the vector is returned if the predicate is False for the entire 156 | -- vector. 157 | -- 158 | -- Begins searching at the end of the vector, in increasing steps of size 2^n. 159 | gallopingSearchRightP 160 | :: (PrimMonad m, MVector v e) => (e -> Bool) -> v (PrimState m) e -> m Int 161 | gallopingSearchRightP p vec = gallopingSearchRightPBounds p vec 0 (length vec) 162 | {-# INLINE gallopingSearchRightP #-} 163 | 164 | -- | Given a predicate that is guaranteed to be monotone on the indices [l,u) in 165 | -- a given vector, finds the index in [l,u] at which the predicate turns from 166 | -- False to True (yielding u if the entire interval is False). 167 | -- Begins searching at l, going right in increasing (2^n)-steps. 168 | gallopingSearchLeftPBounds :: (PrimMonad m, MVector v e) 169 | => (e -> Bool) 170 | -> v (PrimState m) e 171 | -> Int -- ^ l 172 | -> Int -- ^ u 173 | -> m Int 174 | gallopingSearchLeftPBounds p vec l u 175 | | u <= l = return l 176 | | otherwise = do x <- unsafeRead vec l 177 | if p x then return l else iter (l+1) l 2 178 | where 179 | binSearch = binarySearchPBounds p vec 180 | iter !i !j !_stepSize | i >= u - 1 = do 181 | x <- unsafeRead vec (u-1) 182 | if p x then binSearch (j+1) (u-1) else return u 183 | iter !i !j !stepSize = do 184 | x <- unsafeRead vec i 185 | if p x then binSearch (j+1) i else iter (i+stepSize) i (2*stepSize) 186 | {-# INLINE gallopingSearchLeftPBounds #-} 187 | 188 | -- | Given a predicate that is guaranteed to be monotone on the indices [l,u) in 189 | -- a given vector, finds the index in [l,u] at which the predicate turns from 190 | -- False to True (yielding u if the entire interval is False). 191 | -- Begins searching at u, going left in increasing (2^n)-steps. 192 | gallopingSearchRightPBounds :: (PrimMonad m, MVector v e) 193 | => (e -> Bool) 194 | -> v (PrimState m) e 195 | -> Int -- ^ l 196 | -> Int -- ^ u 197 | -> m Int 198 | gallopingSearchRightPBounds p vec l u 199 | | u <= l = return l 200 | | otherwise = iter (u-1) (u-1) (-1) 201 | where 202 | binSearch = binarySearchPBounds p vec 203 | iter !i !j !_stepSize | i <= l = do 204 | x <- unsafeRead vec l 205 | if p x then return l else binSearch (l+1) j 206 | iter !i !j !stepSize = do 207 | x <- unsafeRead vec i 208 | if p x then iter (i+stepSize) i (2*stepSize) else binSearch (i+1) j 209 | {-# INLINE gallopingSearchRightPBounds #-} 210 | -------------------------------------------------------------------------------- /src/Data/Vector/Algorithms/Tim.hs: -------------------------------------------------------------------------------- 1 | {-# LANGUAGE BangPatterns #-} 2 | 3 | -- --------------------------------------------------------------------------- 4 | -- | 5 | -- Module : Data.Vector.Algorithms.Tim 6 | -- Copyright : (c) 2013-2015 Dan Doel, 2015 Tim Baumann 7 | -- Maintainer : Dan Doel 8 | -- Stability : Experimental 9 | -- Portability : Non-portable (bang patterns) 10 | -- 11 | -- Timsort is a complex, adaptive, bottom-up merge sort. It is designed to 12 | -- minimize comparisons as much as possible, even at some cost in overhead. 13 | -- Thus, it may not be ideal for sorting simple primitive types, for which 14 | -- comparison is cheap. It may, however, be significantly faster for sorting 15 | -- arrays of complex values (strings would be an example, though an algorithm 16 | -- not based on comparison would probably be superior in that particular 17 | -- case). 18 | -- 19 | -- For more information on the details of the algorithm, read on. 20 | -- 21 | -- The first step of the algorithm is to identify runs of elements. These can 22 | -- either be non-decreasing or strictly decreasing sequences of elements in 23 | -- the input. Strictly decreasing sequences are used rather than 24 | -- non-increasing so that they can be easily reversed in place without the 25 | -- sort becoming unstable. 26 | -- 27 | -- If the natural runs are too short, they are padded to a minimum value. The 28 | -- minimum is chosen based on the length of the array, and padded runs are put 29 | -- in order using insertion sort. The length of the minimum run size is 30 | -- determined as follows: 31 | -- 32 | -- * If the length of the array is less than 64, the minimum size is the 33 | -- length of the array, and insertion sort is used for the entirety 34 | -- 35 | -- * Otherwise, a value between 32 and 64 is chosen such that N/min is 36 | -- either equal to or just below a power of two. This avoids having a 37 | -- small chunk left over to merge into much larger chunks at the end. 38 | -- 39 | -- This is accomplished by taking the the mininum to be the lowest six bits 40 | -- containing the highest set bit, and adding one if any other bits are set. 41 | -- For instance: 42 | -- 43 | -- length: 00000000 00000000 00000000 00011011 = 25 44 | -- min: 00000000 00000000 00000000 00011011 = 25 45 | -- 46 | -- length: 00000000 11111100 00000000 00000000 = 63 * 2^18 47 | -- min: 00000000 00000000 00000000 00111111 = 63 48 | -- 49 | -- length: 00000000 11111100 00000000 00000001 = 63 * 2^18 + 1 50 | -- min: 00000000 00000000 00000000 01000000 = 64 51 | -- 52 | -- Once chunks can be produced, the next step is merging them. The indices of 53 | -- all runs are stored in a stack. When we identify a new run, we push it onto 54 | -- the stack. However, certain invariants are maintained of the stack entries. 55 | -- Namely: 56 | -- 57 | -- if stk = _ :> z :> y :> x 58 | -- length x + length y < length z 59 | -- 60 | -- if stk = _ :> y :> x 61 | -- length x < length y 62 | -- 63 | -- This ensures that the chunks stored are decreasing, and that the chunk 64 | -- sizes follow something like the fibonacci sequence, ensuring there at most 65 | -- log-many chunks at any time. If pushing a new chunk on the stack would 66 | -- violate either of the invariants, we first perform a merge. 67 | -- 68 | -- If length x + length y >= length z, then y is merged with the smaller of x 69 | -- and z (if they are tied, x is chosen, because it is more likely to be 70 | -- cached). If, further, length x >= length y then they are merged. These steps 71 | -- are repeated until the invariants are established. 72 | -- 73 | -- The last important piece of the algorithm is the merging. At first, two 74 | -- chunks are merged element-wise. However, while doing so, counts are kept of 75 | -- the number of elements taken from one chunk without any from its partner. If 76 | -- this count exceeds a threshold, the merge switches to searching for elements 77 | -- from one chunk in the other, and copying chunks at a time. If these chunks 78 | -- start falling below the threshold, the merge switches back to element-wise. 79 | -- 80 | -- The search used in the merge is also special. It uses a galloping strategy, 81 | -- where exponentially increasing indices are tested, and once two such indices 82 | -- are determined to bracket the desired value, binary search is used to find 83 | -- the exact index within that range. This is asymptotically the same as simply 84 | -- using binary search, but is likely to do fewer comparisons than binary search 85 | -- would. 86 | -- 87 | -- One aspect that is not yet implemented from the original Tim sort is the 88 | -- adjustment of the above threshold. When galloping saves time, the threshold 89 | -- is lowered, and when it doesn't, it is raised. This may be implemented in the 90 | -- future. 91 | 92 | module Data.Vector.Algorithms.Tim 93 | ( sort 94 | , sortUniq 95 | , sortBy 96 | , sortUniqBy 97 | ) where 98 | 99 | import Prelude hiding (length, reverse) 100 | 101 | import Control.Monad.Primitive 102 | import Control.Monad (when) 103 | import Data.Bits 104 | 105 | import Data.Vector.Generic.Mutable 106 | 107 | import Data.Vector.Algorithms.Search ( gallopingSearchRightPBounds 108 | , gallopingSearchLeftPBounds 109 | ) 110 | import Data.Vector.Algorithms.Insertion (sortByBounds', Comparison) 111 | import Data.Vector.Algorithms.Common (uniqueMutableBy) 112 | 113 | -- | Sorts an array using the default comparison. 114 | sort :: (PrimMonad m, MVector v e, Ord e) => v (PrimState m) e -> m () 115 | sort = sortBy compare 116 | {-# INLINE sort #-} 117 | 118 | -- | A variant on `sort` that returns a vector of unique elements. 119 | sortUniq :: (PrimMonad m, MVector v e, Ord e) => v (PrimState m) e -> m (v (PrimState m) e) 120 | sortUniq = sortUniqBy compare 121 | {-# INLINE sortUniq #-} 122 | 123 | -- | Sorts an array using a custom comparison. 124 | sortBy :: (PrimMonad m, MVector v e) 125 | => Comparison e -> v (PrimState m) e -> m () 126 | sortBy cmp vec 127 | | mr == len = iter [0] 0 (error "no merge buffer needed!") 128 | | otherwise = new 256 >>= iter [] 0 129 | where 130 | len = length vec 131 | mr = minrun len 132 | iter s i tmpBuf 133 | | i >= len = performRemainingMerges s tmpBuf 134 | | otherwise = do (order, runLen) <- nextRun cmp vec i len 135 | when (order == Descending) $ 136 | reverse $ unsafeSlice i runLen vec 137 | let runEnd = min len (i + max runLen mr) 138 | sortByBounds' cmp vec i (i+runLen) runEnd 139 | (s', tmpBuf') <- performMerges (i : s) runEnd tmpBuf 140 | iter s' runEnd tmpBuf' 141 | runLengthInvariantBroken a b c i = (b - a <= i - b) || (c - b <= i - c) 142 | performMerges [b,a] i tmpBuf 143 | | i - b >= b - a = merge cmp vec a b i tmpBuf >>= performMerges [a] i 144 | performMerges (c:b:a:ss) i tmpBuf 145 | | runLengthInvariantBroken a b c i = 146 | if i - c <= b - a 147 | then merge cmp vec b c i tmpBuf >>= performMerges (b:a:ss) i 148 | else do tmpBuf' <- merge cmp vec a b c tmpBuf 149 | (ass', tmpBuf'') <- performMerges (a:ss) c tmpBuf' 150 | performMerges (c:ass') i tmpBuf'' 151 | performMerges s _ tmpBuf = return (s, tmpBuf) 152 | performRemainingMerges (b:a:ss) tmpBuf = 153 | merge cmp vec a b len tmpBuf >>= performRemainingMerges (a:ss) 154 | performRemainingMerges _ _ = return () 155 | {-# INLINE sortBy #-} 156 | 157 | -- | A variant on `sortBy` which returns a vector of unique elements. 158 | sortUniqBy :: (PrimMonad m, MVector v e) 159 | => Comparison e -> v (PrimState m) e -> m (v (PrimState m) e) 160 | sortUniqBy cmp vec = do 161 | sortBy cmp vec 162 | uniqueMutableBy cmp vec 163 | {-# INLINE sortUniqBy #-} 164 | 165 | -- | Computes the minimum run size for the sort. The goal is to choose a size 166 | -- such that there are almost if not exactly 2^n chunks of that size in the 167 | -- array. 168 | minrun :: Int -> Int 169 | minrun n0 = (n0 `unsafeShiftR` extra) + if (lowMask .&. n0) > 0 then 1 else 0 170 | where 171 | -- smear the bits down from the most significant bit 172 | !n1 = n0 .|. unsafeShiftR n0 1 173 | !n2 = n1 .|. unsafeShiftR n1 2 174 | !n3 = n2 .|. unsafeShiftR n2 4 175 | !n4 = n3 .|. unsafeShiftR n3 8 176 | !n5 = n4 .|. unsafeShiftR n4 16 177 | !n6 = n5 .|. unsafeShiftR n5 32 178 | 179 | -- mask for the bits lower than the 6 highest bits 180 | !lowMask = n6 `unsafeShiftR` 6 181 | 182 | !extra = popCount lowMask 183 | {-# INLINE minrun #-} 184 | 185 | data Order = Ascending | Descending deriving (Eq, Show) 186 | 187 | -- | Identify the next run (that is a monotonically increasing or strictly 188 | -- decreasing sequence) in the slice [l,u) in vec. Returns the order and length 189 | -- of the run. 190 | nextRun :: (PrimMonad m, MVector v e) 191 | => Comparison e 192 | -> v (PrimState m) e 193 | -> Int -- ^ l 194 | -> Int -- ^ u 195 | -> m (Order, Int) 196 | nextRun _ _ i len | i+1 >= len = return (Ascending, 1) 197 | nextRun cmp vec i len = do x <- unsafeRead vec i 198 | y <- unsafeRead vec (i+1) 199 | if x `gt` y then desc y 2 else asc y 2 200 | where 201 | gt a b = cmp a b == GT 202 | desc _ !k | i + k >= len = return (Descending, k) 203 | desc x !k = do y <- unsafeRead vec (i+k) 204 | if x `gt` y then desc y (k+1) else return (Descending, k) 205 | asc _ !k | i + k >= len = return (Ascending, k) 206 | asc x !k = do y <- unsafeRead vec (i+k) 207 | if x `gt` y then return (Ascending, k) else asc y (k+1) 208 | {-# INLINE nextRun #-} 209 | 210 | -- | Tests if a temporary buffer has a given size. If not, allocates a new 211 | -- buffer and returns it instead of the old temporary buffer. 212 | ensureCapacity :: (PrimMonad m, MVector v e) 213 | => Int -> v (PrimState m) e -> m (v (PrimState m) e) 214 | ensureCapacity l tmpBuf 215 | | l <= length tmpBuf = return tmpBuf 216 | | otherwise = new (2*l) 217 | {-# INLINE ensureCapacity #-} 218 | 219 | -- | Copy the slice [i,i+len) from vec to tmpBuf. If tmpBuf is not large enough, 220 | -- a new buffer is allocated and used. Returns the buffer. 221 | cloneSlice :: (PrimMonad m, MVector v e) 222 | => Int -- ^ i 223 | -> Int -- ^ len 224 | -> v (PrimState m) e -- ^ vec 225 | -> v (PrimState m) e -- ^ tmpBuf 226 | -> m (v (PrimState m) e) 227 | cloneSlice i len vec tmpBuf = do 228 | tmpBuf' <- ensureCapacity len tmpBuf 229 | unsafeCopy (unsafeSlice 0 len tmpBuf') (unsafeSlice i len vec) 230 | return tmpBuf' 231 | {-# INLINE cloneSlice #-} 232 | 233 | -- | Number of consecutive times merge chooses the element from the same run 234 | -- before galloping mode is activated. 235 | minGallop :: Int 236 | minGallop = 7 237 | {-# INLINE minGallop #-} 238 | 239 | -- | Merge the adjacent sorted slices [l,m) and [m,u) in vec. This is done by 240 | -- copying the slice [l,m) to a temporary buffer. Returns the (enlarged) 241 | -- temporary buffer. 242 | mergeLo :: (PrimMonad m, MVector v e) 243 | => Comparison e 244 | -> v (PrimState m) e -- ^ vec 245 | -> Int -- ^ l 246 | -> Int -- ^ m 247 | -> Int -- ^ u 248 | -> v (PrimState m) e -- ^ tmpBuf 249 | -> m (v (PrimState m) e) 250 | mergeLo cmp vec l m u tempBuf' = do 251 | tmpBuf <- cloneSlice l tmpBufLen vec tempBuf' 252 | vi <- unsafeRead tmpBuf 0 253 | vj <- unsafeRead vec m 254 | iter tmpBuf 0 m l vi vj minGallop minGallop 255 | return tmpBuf 256 | where 257 | gt a b = cmp a b == GT 258 | gte a b = cmp a b /= LT 259 | tmpBufLen = m - l 260 | 261 | finalize tmpBuf i k = do 262 | let from = unsafeSlice i (tmpBufLen-i) tmpBuf 263 | to = unsafeSlice k (tmpBufLen-i) vec 264 | unsafeCopy to from 265 | 266 | iter _ i _ _ _ _ _ _ | i >= tmpBufLen = return () 267 | iter tmpBuf i j k _ _ _ _ | j >= u = finalize tmpBuf i k 268 | iter tmpBuf i j k _ vj 0 _ = do 269 | i' <- gallopingSearchLeftPBounds (`gt` vj) tmpBuf i tmpBufLen 270 | let gallopLen = i' - i 271 | from = unsafeSlice i gallopLen tmpBuf 272 | to = unsafeSlice k gallopLen vec 273 | unsafeCopy to from 274 | when (i' < tmpBufLen) $ do 275 | vi' <- unsafeRead tmpBuf i' 276 | iter tmpBuf i' j (k+gallopLen) vi' vj minGallop minGallop 277 | iter tmpBuf i j k vi _ _ 0 = do 278 | j' <- gallopingSearchLeftPBounds (`gte` vi) vec j u 279 | let gallopLen = j' - j 280 | from = slice j gallopLen vec 281 | to = slice k gallopLen vec 282 | unsafeMove to from 283 | if j' >= u then finalize tmpBuf i (k + gallopLen) else do 284 | vj' <- unsafeRead vec j' 285 | iter tmpBuf i j' (k+gallopLen) vi vj' minGallop minGallop 286 | iter tmpBuf i j k vi vj ga gb 287 | | vj `gte` vi = do unsafeWrite vec k vi 288 | when (i + 1 < tmpBufLen) $ do 289 | vi' <- unsafeRead tmpBuf (i+1) 290 | iter tmpBuf (i+1) j (k+1) vi' vj (ga-1) minGallop 291 | | otherwise = do unsafeWrite vec k vj 292 | if j + 1 >= u then finalize tmpBuf i (k + 1) else do 293 | vj' <- unsafeRead vec (j+1) 294 | iter tmpBuf i (j+1) (k+1) vi vj' minGallop (gb-1) 295 | {-# INLINE mergeLo #-} 296 | 297 | -- | Merge the adjacent sorted slices [l,m) and [m,u) in vec. This is done by 298 | -- copying the slice [j,k) to a temporary buffer. Returns the (enlarged) 299 | -- temporary buffer. 300 | mergeHi :: (PrimMonad m, MVector v e) 301 | => Comparison e 302 | -> v (PrimState m) e -- ^ vec 303 | -> Int -- ^ l 304 | -> Int -- ^ m 305 | -> Int -- ^ u 306 | -> v (PrimState m) e -- ^ tmpBuf 307 | -> m (v (PrimState m) e) 308 | mergeHi cmp vec l m u tmpBuf' = do 309 | tmpBuf <- cloneSlice m tmpBufLen vec tmpBuf' 310 | vi <- unsafeRead vec (m-1) 311 | vj <- unsafeRead tmpBuf (tmpBufLen-1) 312 | iter tmpBuf (m-1) (tmpBufLen-1) (u-1) vi vj minGallop minGallop 313 | return tmpBuf 314 | where 315 | gt a b = cmp a b == GT 316 | gte a b = cmp a b /= LT 317 | tmpBufLen = u - m 318 | 319 | finalize tmpBuf j = do 320 | let from = unsafeSlice 0 (j+1) tmpBuf 321 | to = unsafeSlice l (j+1) vec 322 | unsafeCopy to from 323 | 324 | iter _ _ j _ _ _ _ _ | j < 0 = return () 325 | iter tmpBuf i j _ _ _ _ _ | i < l = finalize tmpBuf j 326 | iter tmpBuf i j k _ vj 0 _ = do 327 | i' <- gallopingSearchRightPBounds (`gt` vj) vec l i 328 | let gallopLen = i - i' 329 | from = slice (i'+1) gallopLen vec 330 | to = slice (k-gallopLen+1) gallopLen vec 331 | unsafeMove to from 332 | if i' < l then finalize tmpBuf j else do 333 | vi' <- unsafeRead vec i' 334 | iter tmpBuf i' j (k-gallopLen) vi' vj minGallop minGallop 335 | iter tmpBuf i j k vi _ _ 0 = do 336 | j' <- gallopingSearchRightPBounds (`gte` vi) tmpBuf 0 j 337 | let gallopLen = j - j' 338 | from = slice (j'+1) gallopLen tmpBuf 339 | to = slice (k-gallopLen+1) gallopLen vec 340 | unsafeCopy to from 341 | when (j' >= 0) $ do 342 | vj' <- unsafeRead tmpBuf j' 343 | iter tmpBuf i j' (k-gallopLen) vi vj' minGallop minGallop 344 | iter tmpBuf i j k vi vj ga gb 345 | | vi `gt` vj = do unsafeWrite vec k vi 346 | if i - 1 < l then finalize tmpBuf j else do 347 | vi' <- unsafeRead vec (i-1) 348 | iter tmpBuf (i-1) j (k-1) vi' vj (ga-1) minGallop 349 | | otherwise = do unsafeWrite vec k vj 350 | when (j - 1 >= 0) $ do 351 | vj' <- unsafeRead tmpBuf (j-1) 352 | iter tmpBuf i (j-1) (k-1) vi vj' minGallop (gb-1) 353 | {-# INLINE mergeHi #-} 354 | 355 | -- | Merge the adjacent sorted slices A=[l,m) and B=[m,u) in vec. This begins 356 | -- with galloping searches to find the index of vec[m] in A and the index of 357 | -- vec[m-1] in B to reduce the sizes of A and B. Then it uses `mergeHi` or 358 | -- `mergeLo` depending on whether A or B is larger. Returns the (enlarged) 359 | -- temporary buffer. 360 | merge :: (PrimMonad m, MVector v e) 361 | => Comparison e 362 | -> v (PrimState m) e -- ^ vec 363 | -> Int -- ^ l 364 | -> Int -- ^ m 365 | -> Int -- ^ u 366 | -> v (PrimState m) e -- ^ tmpBuf 367 | -> m (v (PrimState m) e) 368 | merge cmp vec l m u tmpBuf = do 369 | vm <- unsafeRead vec m 370 | l' <- gallopingSearchLeftPBounds (`gt` vm) vec l m 371 | if l' >= m 372 | then return tmpBuf 373 | else do 374 | vn <- unsafeRead vec (m-1) 375 | u' <- gallopingSearchRightPBounds (`gte` vn) vec m u 376 | if u' <= m 377 | then return tmpBuf 378 | else (if (m-l') <= (u'-m) then mergeLo else mergeHi) cmp vec l' m u' tmpBuf 379 | where 380 | gt a b = cmp a b == GT 381 | gte a b = cmp a b /= LT 382 | {-# INLINE merge #-} 383 | -------------------------------------------------------------------------------- /tests/properties/Optimal.hs: -------------------------------------------------------------------------------- 1 | {-# LANGUAGE TypeOperators, FlexibleContexts #-} 2 | 3 | -- Exhaustive test sets for proper sorting and stability of 4 | -- optimal sorts 5 | 6 | module Optimal where 7 | 8 | import Control.Arrow 9 | import Control.Monad 10 | 11 | import qualified Data.List as List 12 | import Data.Function 13 | 14 | import Data.Vector.Generic hiding (map, zip, concatMap, (++), replicate, foldM) 15 | 16 | interleavings :: [a] -> [a] -> [[a]] 17 | interleavings [ ] ys = [ys] 18 | interleavings xs [ ] = [xs] 19 | interleavings xs@(x:xt) ys@(y:yt) = map (x:) (interleavings xt ys) 20 | ++ map (y:) (interleavings xs yt) 21 | 22 | monotones :: Int -> Int -> [[Int]] 23 | monotones k = atLeastOne 0 24 | where 25 | atLeastOne i 0 = [[]] 26 | atLeastOne i n = map (i:) $ picks i (n-1) 27 | picks _ 0 = [[]] 28 | picks i n | i >= k = [replicate n k] 29 | | otherwise = map (i:) (picks i (n-1)) ++ atLeastOne (i+1) n 30 | 31 | 32 | stability :: (Vector v (Int,Int)) => Int -> [v (Int, Int)] 33 | stability n = concatMap ( map fromList 34 | . foldM interleavings [] 35 | . List.groupBy ((==) `on` fst) 36 | . flip zip [0..]) 37 | $ monotones (n-2) n 38 | 39 | sort2 :: (Vector v Int) => [v Int] 40 | sort2 = map fromList $ List.permutations [0,1] 41 | 42 | stability2 :: (Vector v (Int,Int)) => [v (Int, Int)] 43 | stability2 = [fromList [(0, 0), (0, 1)]] 44 | 45 | sort3 :: (Vector v Int) => [v Int] 46 | sort3 = map fromList $ List.permutations [0..2] 47 | 48 | {- 49 | stability3 :: [UArr (Int :*: Int)] 50 | stability3 = map toU [ [0:*:0, 0:*:1, 0:*:2] 51 | , [0:*:0, 0:*:1, 1:*:2] 52 | , [0:*:0, 1:*:2, 0:*:1] 53 | , [1:*:2, 0:*:0, 0:*:1] 54 | , [0:*:0, 1:*:1, 1:*:2] 55 | , [1:*:1, 0:*:0, 1:*:2] 56 | , [1:*:1, 1:*:2, 0:*:0] 57 | ] 58 | -} 59 | 60 | sort4 :: (Vector v Int) => [v Int] 61 | sort4 = map fromList $ List.permutations [0..3] 62 | 63 | -------------------------------------------------------------------------------- /tests/properties/Properties.hs: -------------------------------------------------------------------------------- 1 | {-# LANGUAGE FlexibleContexts #-} 2 | {-# LANGUAGE GADTs #-} 3 | {-# LANGUAGE RankNTypes #-} 4 | {-# LANGUAGE TypeOperators #-} 5 | 6 | module Properties where 7 | 8 | import Prelude 9 | 10 | import Optimal 11 | 12 | import Control.Monad 13 | import Control.Monad.ST 14 | 15 | import Data.List 16 | import Data.Ord 17 | 18 | import Data.Vector (Vector) 19 | import qualified Data.Vector as V 20 | 21 | import Data.Vector.Mutable (MVector) 22 | import qualified Data.Vector.Mutable as MV 23 | 24 | import Data.Vector.Generic (modify) 25 | 26 | import qualified Data.Vector.Generic.Mutable as G 27 | import qualified Data.Vector.Generic as GV 28 | 29 | import Data.Vector.Algorithms.Optimal (Comparison) 30 | import Data.Vector.Algorithms.Radix (radix, passes, size) 31 | import qualified Data.Vector.Algorithms as Alg 32 | 33 | import qualified Data.Map as M 34 | 35 | import Test.QuickCheck hiding (Sorted) 36 | 37 | import Util 38 | 39 | prop_sorted :: (Ord e) => Vector e -> Property 40 | prop_sorted arr | V.length arr < 2 = property True 41 | | otherwise = check (V.head arr) (V.tail arr) 42 | where 43 | check e arr | V.null arr = property True 44 | | otherwise = e <= V.head arr .&. check (V.head arr) (V.tail arr) 45 | 46 | prop_sorted_uniq :: (Ord e) => Vector e -> Property 47 | prop_sorted_uniq arr | V.length arr < 2 = property True 48 | | otherwise = check (V.head arr) (V.tail arr) 49 | where 50 | check e arr | V.null arr = property True 51 | | otherwise = e < V.head arr .&. check (V.head arr) (V.tail arr) 52 | 53 | prop_empty :: (Ord e) => (forall s. MV.MVector s e -> ST s ()) -> Property 54 | prop_empty algo = prop_sorted (modify algo $ V.fromList []) 55 | 56 | prop_fullsort :: (Ord e) 57 | => (forall s mv. G.MVector mv e => mv s e -> ST s ()) -> Vector e -> Property 58 | prop_fullsort algo arr = prop_sorted $ modify algo arr 59 | 60 | runFreeze 61 | :: forall e . (Ord e) 62 | => (forall s mv . G.MVector mv e => mv s e -> ST s (mv s e)) 63 | -> (forall s v mv. (GV.Vector v e, mv ~ GV.Mutable v) => mv s e -> ST s (v e)) 64 | runFreeze alg mv = do 65 | mv <- alg mv 66 | GV.unsafeFreeze mv 67 | 68 | prop_full_sortUniq 69 | :: (Ord e, Show e) 70 | => (forall s . MV.MVector s e -> ST s (Vector e)) 71 | -> Vector e -> Property 72 | prop_full_sortUniq algo arr = runST $ do 73 | mv <- V.unsafeThaw arr 74 | arr' <- algo mv 75 | pure (prop_sorted_uniq arr') 76 | 77 | {- 78 | prop_schwartzian :: (UA e, UA k, Ord k) 79 | => (e -> k) 80 | -> (forall e s. (UA e) => (e -> e -> Ordering) -> MUArr e s -> ST s ()) 81 | -> UArr e -> Property 82 | prop_schwartzian f algo arr 83 | | lengthU arr < 2 = property True 84 | | otherwise = let srt = modify (algo `usingKeys` f) arr 85 | in check (headU srt) (tailU srt) 86 | where 87 | check e arr | nullU arr = property True 88 | | otherwise = f e <= f (headU arr) .&. check (headU arr) (tailU arr) 89 | -} 90 | 91 | longGen :: (Arbitrary e) => Int -> Gen (Vector e) 92 | longGen k = liftM2 (\l r -> V.fromList (l ++ r)) (vectorOf k arbitrary) arbitrary 93 | 94 | sanity :: Int 95 | sanity = 100 96 | 97 | prop_partialsort :: (Ord e, Arbitrary e, Show e) 98 | => (forall s mv. G.MVector mv e => mv s e -> Int -> ST s ()) 99 | -> Positive Int -> Property 100 | prop_partialsort = prop_sized $ \algo k v -> do 101 | let newVec = modify algo v 102 | vhead = V.take k newVec 103 | vtail = V.drop k newVec 104 | prop_sorted vhead 105 | .&&. 106 | -- Every element in the head should be < every element in the tail. 107 | if V.null vtail then 1 == 1 else V.maximum vhead <= V.minimum vtail 108 | 109 | prop_sized_empty :: (Ord e) => (forall s. MV.MVector s e -> Int -> ST s ()) -> Property 110 | prop_sized_empty algo = prop_empty (flip algo 0) .&&. prop_empty (flip algo 10) 111 | 112 | prop_select :: (Ord e, Arbitrary e, Show e) 113 | => (forall s mv. G.MVector mv e => mv s e -> Int -> ST s ()) 114 | -> Positive Int -> Property 115 | prop_select = prop_sized $ \algo k arr -> 116 | let vec' = modify algo arr 117 | l = V.slice 0 k vec' 118 | r = V.slice k (V.length vec' - k) vec' 119 | in V.all (\e -> V.all (e <=) r) l 120 | 121 | prop_sized :: (Arbitrary e, Show e, Testable prop) 122 | => ((forall s mv. G.MVector mv e => mv s e -> ST s ()) 123 | -> Int -> Vector e -> prop) 124 | -> (forall s mv. G.MVector mv e => mv s e -> Int -> ST s ()) 125 | -> Positive Int -> Property 126 | prop_sized prop algo (Positive k) = 127 | let k' = k `mod` sanity 128 | in forAll (longGen k') $ prop (\marr -> algo marr k') k' 129 | 130 | prop_stable :: (forall e s mv. G.MVector mv e => Comparison e -> mv s e -> ST s ()) 131 | -> Vector Int -> Property 132 | -- prop_stable algo arr = property $ modify algo arr == arr 133 | prop_stable algo arr = stable $ modify (algo (comparing fst)) $ V.zip arr ix 134 | where 135 | ix = V.fromList [1 .. V.length arr] 136 | 137 | stable arr | V.null arr = property True 138 | | otherwise = let (e, i) = V.head arr 139 | in V.all (\(e', i') -> e < e' || i < i') (V.tail arr) 140 | .&. stable (V.tail arr) 141 | 142 | prop_stable_radix :: (forall e s mv. G.MVector mv e => Int -> Int -> (Int -> e -> Int) 143 | -> mv s e -> ST s ()) 144 | -> Vector Int -> Property 145 | prop_stable_radix algo arr = 146 | stable . modify (algo (passes e) (size e) (\k (e, _) -> radix k e)) 147 | $ V.zip arr ix 148 | where 149 | ix = V.fromList [1 .. V.length arr] 150 | e = V.head arr 151 | 152 | prop_optimal :: Int 153 | -> (forall e s mv. G.MVector mv e => Comparison e -> mv s e -> Int -> ST s ()) 154 | -> Property 155 | prop_optimal n algo = label "sorting" sortn .&. label "stability" stabn 156 | where 157 | arrn = V.fromList [0..n-1] 158 | sortn = all ( (== arrn) 159 | . modify (\a -> algo compare a 0) 160 | . V.fromList) 161 | $ permutations [0..n-1] 162 | stabn = all ( (== arrn) 163 | . snd 164 | . V.unzip 165 | . modify (\a -> algo (comparing fst) a 0)) 166 | $ stability n 167 | 168 | type Bag e = M.Map e Int 169 | 170 | toBag :: (Ord e) => Vector e -> Bag e 171 | toBag = M.fromListWith (+) . flip zip (repeat 1) . V.toList 172 | 173 | prop_permutation :: (Ord e) => (forall s mv. G.MVector mv e => mv s e -> ST s ()) 174 | -> Vector e -> Property 175 | prop_permutation algo arr = property $ 176 | toBag arr == toBag (modify algo arr) 177 | 178 | newtype SortedVec e = Sorted (Vector e) 179 | 180 | instance (Show e) => Show (SortedVec e) where 181 | show (Sorted a) = show a 182 | 183 | instance (Arbitrary e, Ord e) => Arbitrary (SortedVec e) where 184 | arbitrary = fmap (Sorted . V.fromList . sort) 185 | $ liftM2 (++) (vectorOf 20 arbitrary) arbitrary 186 | 187 | ixRanges :: Vector e -> Gen (Int, Int) 188 | ixRanges vec = do i <- fmap (`mod` len) arbitrary 189 | j <- fmap (`mod` len) arbitrary 190 | return $ if i < j then (i, j) else (j, i) 191 | where len = V.length vec 192 | 193 | prop_search_inrange :: (Ord e) 194 | => (forall s. MVector s e -> e -> Int -> Int -> ST s Int) 195 | -> SortedVec e -> e -> Property 196 | prop_search_inrange algo (Sorted arr) e = forAll (ixRanges arr) $ \(i, j) -> 197 | let k = runST (mfromList (V.toList arr) >>= \marr -> algo marr e i j) 198 | in property $ i <= k && k <= j 199 | where 200 | len = V.length arr 201 | 202 | prop_search_insert :: (e -> e -> Bool) -> (e -> e -> Bool) 203 | -> (forall s. MVector s e -> e -> ST s Int) 204 | -> SortedVec e -> e -> Property 205 | prop_search_insert lo hi algo (Sorted arr) e = 206 | property $ (k == 0 || (arr V.! (k-1)) `lo` e) 207 | && (k == len || (arr V.! k) `hi` e) 208 | where 209 | len = V.length arr 210 | k = runST (mfromList (V.toList arr) >>= \marr -> algo marr e) 211 | 212 | prop_search_lowbound :: (Ord e) 213 | => (forall s. MVector s e -> e -> ST s Int) 214 | -> SortedVec e -> e -> Property 215 | prop_search_lowbound = prop_search_insert (<) (>=) 216 | 217 | prop_search_upbound :: (Ord e) 218 | => (forall s. MVector s e -> e -> ST s Int) 219 | -> SortedVec e -> e -> Property 220 | prop_search_upbound = prop_search_insert (<=) (>) 221 | 222 | prop_nub :: (Ord e, Show e) => Vector e -> Property 223 | prop_nub v = 224 | V.fromList (nub (V.toList v)) === Alg.nub v 225 | -------------------------------------------------------------------------------- /tests/properties/Tests.hs: -------------------------------------------------------------------------------- 1 | {-# LANGUAGE RankNTypes, TypeOperators, FlexibleContexts, TypeApplications #-} 2 | 3 | module Main (main) where 4 | 5 | import Properties 6 | 7 | import Util 8 | 9 | import Test.QuickCheck 10 | 11 | import Control.Monad 12 | import Control.Monad.ST 13 | 14 | import Data.Int 15 | import Data.Word 16 | 17 | import qualified Data.ByteString as B 18 | 19 | import Data.Vector (Vector) 20 | import qualified Data.Vector as V 21 | import qualified Data.Vector.Mutable as BoxedMV 22 | 23 | import qualified Data.Vector.Generic as G 24 | import Data.Vector.Generic.Mutable (MVector) 25 | import qualified Data.Vector.Generic.Mutable as MV 26 | 27 | import qualified Data.Vector.Algorithms.Insertion as INS 28 | import qualified Data.Vector.Algorithms.Intro as INT 29 | import qualified Data.Vector.Algorithms.Merge as M 30 | import qualified Data.Vector.Algorithms.Radix as R 31 | import qualified Data.Vector.Algorithms.Heap as H 32 | import qualified Data.Vector.Algorithms.Optimal as O 33 | import qualified Data.Vector.Algorithms.AmericanFlag as AF 34 | import qualified Data.Vector.Algorithms.Tim as T 35 | 36 | import qualified Data.Vector.Algorithms.Search as SR 37 | 38 | type Algo e r = forall s mv. MVector mv e => mv s e -> ST s r 39 | type SizeAlgo e r = forall s mv. MVector mv e => mv s e -> Int -> ST s r 40 | type BoundAlgo e r = forall s mv. MVector mv e => mv s e -> Int -> Int -> ST s r 41 | type MonoAlgo e r = forall s . BoxedMV.MVector s e -> ST s r 42 | 43 | newtype WrappedAlgo e r = WrapAlgo { unWrapAlgo :: Algo e r } 44 | newtype WrappedSizeAlgo e r = WrapSizeAlgo { unWrapSizeAlgo :: SizeAlgo e r } 45 | newtype WrappedBoundAlgo e r = WrapBoundAlgo { unWrapBoundAlgo :: BoundAlgo e r } 46 | newtype WrappedMonoAlgo e r = MonoAlgo { unWrapMonoAlgo :: MonoAlgo e r } 47 | 48 | args = stdArgs 49 | { maxSuccess = 1000 50 | , maxDiscardRatio = 2 51 | } 52 | 53 | check_Int_sort = forM_ algos $ \(name,algo) -> 54 | quickCheckWith args (label name . prop_fullsort (unWrapAlgo algo)) 55 | where 56 | algos :: [(String, WrappedAlgo Int ())] 57 | algos = [ ("introsort", WrapAlgo INT.sort) 58 | , ("insertion sort", WrapAlgo INS.sort) 59 | , ("merge sort", WrapAlgo M.sort) 60 | , ("heapsort", WrapAlgo H.sort) 61 | , ("timsort", WrapAlgo T.sort) 62 | ] 63 | 64 | check_Int_sortUniq = forM_ algos $ \(name,algo) -> 65 | quickCheckWith args (label name . prop_full_sortUniq (unWrapMonoAlgo algo)) 66 | where 67 | algos :: [(String, WrappedMonoAlgo Int (Vector Int))] 68 | algos = [ ("intro_sortUniq", MonoAlgo (runFreeze INT.sortUniq)) 69 | , ("insertion sortUniq", MonoAlgo (runFreeze INS.sortUniq)) 70 | , ("merge sortUniq", MonoAlgo (runFreeze M.sortUniq)) 71 | , ("heap_sortUniq", MonoAlgo (runFreeze H.sortUniq)) 72 | , ("tim_sortUniq", MonoAlgo (runFreeze T.sortUniq)) 73 | ] 74 | 75 | check_Int_partialsort = forM_ algos $ \(name,algo) -> 76 | quickCheckWith args (label name . prop_partialsort (unWrapSizeAlgo algo)) 77 | where 78 | algos :: [(String, WrappedSizeAlgo Int ())] 79 | algos = [ ("intro-partialsort", WrapSizeAlgo INT.partialSort) 80 | , ("heap partialsort", WrapSizeAlgo H.partialSort) 81 | ] 82 | 83 | check_Int_select = forM_ algos $ \(name,algo) -> 84 | quickCheckWith args (label name . prop_select (unWrapSizeAlgo algo)) 85 | where 86 | algos :: [(String, WrappedSizeAlgo Int ())] 87 | algos = [ ("intro-select", WrapSizeAlgo INT.select) 88 | , ("heap select", WrapSizeAlgo H.select) 89 | ] 90 | 91 | check_nub = quickCheckWith args (label "nub Int" . (prop_nub @Int)) 92 | 93 | 94 | check_radix_sorts = do 95 | qc (label "radix Word8" . prop_fullsort (R.sort :: Algo Word8 ())) 96 | qc (label "radix Word16" . prop_fullsort (R.sort :: Algo Word16 ())) 97 | qc (label "radix Word32" . prop_fullsort (R.sort :: Algo Word32 ())) 98 | qc (label "radix Word64" . prop_fullsort (R.sort :: Algo Word64 ())) 99 | qc (label "radix Word" . prop_fullsort (R.sort :: Algo Word ())) 100 | qc (label "radix Int8" . prop_fullsort (R.sort :: Algo Int8 ())) 101 | qc (label "radix Int16" . prop_fullsort (R.sort :: Algo Int16 ())) 102 | qc (label "radix Int32" . prop_fullsort (R.sort :: Algo Int32 ())) 103 | qc (label "radix Int64" . prop_fullsort (R.sort :: Algo Int64 ())) 104 | qc (label "radix Int" . prop_fullsort (R.sort :: Algo Int ())) 105 | qc (label "radix (Int, Int)" . prop_fullsort (R.sort :: Algo (Int, Int) ())) 106 | 107 | qc (label "flag Word8" . prop_fullsort (AF.sort :: Algo Word8 ())) 108 | qc (label "flag Word16" . prop_fullsort (AF.sort :: Algo Word16 ())) 109 | qc (label "flag Word32" . prop_fullsort (AF.sort :: Algo Word32 ())) 110 | qc (label "flag Word64" . prop_fullsort (AF.sort :: Algo Word64 ())) 111 | qc (label "flag Word" . prop_fullsort (AF.sort :: Algo Word ())) 112 | qc (label "flag Int8" . prop_fullsort (AF.sort :: Algo Int8 ())) 113 | qc (label "flag Int16" . prop_fullsort (AF.sort :: Algo Int16 ())) 114 | qc (label "flag Int32" . prop_fullsort (AF.sort :: Algo Int32 ())) 115 | qc (label "flag Int64" . prop_fullsort (AF.sort :: Algo Int64 ())) 116 | qc (label "flag Int" . prop_fullsort (AF.sort :: Algo Int ())) 117 | qc (label "flag ByteString" . prop_fullsort (AF.sort :: Algo B.ByteString ())) 118 | where 119 | qc algo = quickCheckWith args algo 120 | 121 | {- 122 | check_schwartzian = do 123 | quickCheckWith args (prop_schwartzian i2w INS.sortBy) 124 | where 125 | i2w :: Int -> Word 126 | i2w = fromIntegral 127 | -} 128 | 129 | check_stable = do quickCheckWith args (label "merge sort" . prop_stable M.sortBy) 130 | quickCheckWith args (label "radix sort" . prop_stable_radix R.sortBy) 131 | quickCheckWith args (label "tim sort" . prop_stable T.sortBy) 132 | 133 | 134 | check_optimal = do qc . label "size 2" $ prop_optimal 2 O.sort2ByOffset 135 | qc . label "size 3" $ prop_optimal 3 O.sort3ByOffset 136 | qc . label "size 4" $ prop_optimal 4 O.sort4ByOffset 137 | where 138 | qc = quickCheck 139 | 140 | check_permutation = do 141 | qc $ label "introsort" . prop_permutation (INT.sort :: Algo Int ()) 142 | qc $ label "heapsort" . prop_permutation (H.sort :: Algo Int ()) 143 | 144 | qc $ label "mergesort" . prop_permutation (M.sort :: Algo Int ()) 145 | qc $ label "timsort" . prop_permutation (T.sort :: Algo Int ()) 146 | qc $ label "radix I8" . prop_permutation (R.sort :: Algo Int8 ()) 147 | qc $ label "radix I16" . prop_permutation (R.sort :: Algo Int16 ()) 148 | qc $ label "radix I32" . prop_permutation (R.sort :: Algo Int32 ()) 149 | qc $ label "radix I64" . prop_permutation (R.sort :: Algo Int64 ()) 150 | qc $ label "radix Int" . prop_permutation (R.sort :: Algo Int ()) 151 | qc $ label "radix W8" . prop_permutation (R.sort :: Algo Word8 ()) 152 | qc $ label "radix W16" . prop_permutation (R.sort :: Algo Word16 ()) 153 | qc $ label "radix W32" . prop_permutation (R.sort :: Algo Word32 ()) 154 | qc $ label "radix W64" . prop_permutation (R.sort :: Algo Word64 ()) 155 | qc $ label "radix Word" . prop_permutation (R.sort :: Algo Word ()) 156 | qc $ label "flag I8" . prop_permutation (AF.sort :: Algo Int8 ()) 157 | qc $ label "flag I16" . prop_permutation (AF.sort :: Algo Int16 ()) 158 | qc $ label "flag I32" . prop_permutation (AF.sort :: Algo Int32 ()) 159 | qc $ label "flag I64" . prop_permutation (AF.sort :: Algo Int64 ()) 160 | qc $ label "flag Int" . prop_permutation (AF.sort :: Algo Int ()) 161 | qc $ label "flag W8" . prop_permutation (AF.sort :: Algo Word8 ()) 162 | qc $ label "flag W16" . prop_permutation (AF.sort :: Algo Word16 ()) 163 | qc $ label "flag W32" . prop_permutation (AF.sort :: Algo Word32 ()) 164 | qc $ label "flag W64" . prop_permutation (AF.sort :: Algo Word64 ()) 165 | qc $ label "flag Word" . prop_permutation (AF.sort :: Algo Word ()) 166 | qc $ label "flag ByteString" . prop_permutation (AF.sort :: Algo B.ByteString ()) 167 | qc $ label "intropartial" . prop_sized (\x -> const (prop_permutation x)) 168 | (INT.partialSort :: SizeAlgo Int ()) 169 | qc $ label "introselect" . prop_sized (\x -> const (prop_permutation x)) 170 | (INT.select :: SizeAlgo Int ()) 171 | qc $ label "heappartial" . prop_sized (\x -> const (prop_permutation x)) 172 | (H.partialSort :: SizeAlgo Int ()) 173 | qc $ label "heapselect" . prop_sized (\x -> const (prop_permutation x)) 174 | (H.select :: SizeAlgo Int ()) 175 | 176 | where 177 | qc prop = quickCheckWith args prop 178 | 179 | check_corners = do 180 | qc "introsort empty" $ prop_empty (INT.sort :: Algo Int ()) 181 | qc "intropartial empty" $ prop_sized_empty (INT.partialSort :: SizeAlgo Int ()) 182 | qc "introselect empty" $ prop_sized_empty (INT.select :: SizeAlgo Int ()) 183 | qc "heapsort empty" $ prop_empty (H.sort :: Algo Int ()) 184 | qc "heappartial empty" $ prop_sized_empty (H.partialSort :: SizeAlgo Int ()) 185 | qc "heapselect empty" $ prop_sized_empty (H.select :: SizeAlgo Int ()) 186 | qc "mergesort empty" $ prop_empty (M.sort :: Algo Int ()) 187 | qc "timsort empty" $ prop_empty (T.sort :: Algo Int ()) 188 | qc "radixsort empty" $ prop_empty (R.sort :: Algo Int ()) 189 | qc "flagsort empty" $ prop_empty (AF.sort :: Algo Int ()) 190 | where 191 | qc s prop = quickCheckWith (stdArgs { maxSuccess = 2 }) (label s prop) 192 | 193 | type SAlgo e r = forall s mv. MVector mv e => mv s e -> e -> ST s r 194 | type BoundSAlgo e r = forall s mv. MVector mv e => mv s e -> e -> Int -> Int -> ST s r 195 | 196 | check_search_range = do 197 | qc $ (label "binarySearchL" .) 198 | . prop_search_inrange (SR.binarySearchLByBounds compare :: BoundSAlgo Int Int) 199 | qc $ (label "binarySearchL lo-bound" .) 200 | . prop_search_lowbound (SR.binarySearchL :: SAlgo Int Int) 201 | qc $ (label "binarySearch" .) 202 | . prop_search_inrange (SR.binarySearchByBounds compare :: BoundSAlgo Int Int) 203 | qc $ (label "binarySearchR" .) 204 | . prop_search_inrange (SR.binarySearchRByBounds compare :: BoundSAlgo Int Int) 205 | qc $ (label "binarySearchR hi-bound" .) 206 | . prop_search_upbound (SR.binarySearchR :: SAlgo Int Int) 207 | where 208 | qc prop = quickCheckWith args prop 209 | 210 | main = do putStrLn "Int tests:" 211 | check_Int_sort 212 | check_Int_sortUniq 213 | check_Int_partialsort 214 | check_Int_select 215 | putStrLn "Radix sort tests:" 216 | check_radix_sorts 217 | -- putStrLn "Schwartzian transform (Int -> Word):" 218 | -- check_schwartzian 219 | putStrLn "Stability:" 220 | check_stable 221 | putStrLn "Optimals:" 222 | check_optimal 223 | putStrLn "Permutation:" 224 | check_permutation 225 | putStrLn "Search in range:" 226 | check_search_range 227 | putStrLn "Corner cases:" 228 | check_corners 229 | putStrLn "Algorithms:" 230 | check_nub 231 | -------------------------------------------------------------------------------- /tests/properties/Util.hs: -------------------------------------------------------------------------------- 1 | {-# LANGUAGE TypeOperators #-} 2 | 3 | module Util where 4 | 5 | import Control.Monad 6 | import Control.Monad.ST 7 | 8 | import Data.Word 9 | import Data.Int 10 | 11 | import qualified Data.ByteString as B 12 | 13 | import qualified Data.Vector as V 14 | 15 | import Data.Vector.Mutable hiding (length) 16 | 17 | import Test.QuickCheck 18 | 19 | 20 | mfromList :: [e] -> ST s (MVector s e) 21 | mfromList l = do v <- new (length l) 22 | fill l 0 v 23 | where 24 | fill [] _ v = return v 25 | fill (x:xs) i v = do write v i x 26 | fill xs (i+1) v 27 | 28 | instance (Arbitrary e) => Arbitrary (V.Vector e) where 29 | arbitrary = fmap V.fromList arbitrary 30 | 31 | instance Arbitrary B.ByteString where 32 | arbitrary = B.pack `fmap` arbitrary 33 | 34 | -------------------------------------------------------------------------------- /vector-algorithms.cabal: -------------------------------------------------------------------------------- 1 | cabal-version: >= 1.10 2 | name: vector-algorithms 3 | version: 0.9.1.0 4 | license: BSD3 5 | license-file: LICENSE 6 | author: Dan Doel 7 | maintainer: Dan Doel 8 | Erik de Castro Lopo 9 | copyright: (c) 2008,2009,2010,2011,2012,2013,2014,2015 Dan Doel 10 | (c) 2015 Tim Baumann 11 | homepage: https://github.com/erikd/vector-algorithms/ 12 | category: Data 13 | synopsis: Efficient algorithms for vector arrays 14 | description: Efficient algorithms for sorting vector arrays. At some stage 15 | other vector algorithms may be added. 16 | build-type: Simple 17 | 18 | extra-source-files: CHANGELOG.md 19 | 20 | tested-with: 21 | GHC == 9.12.1 22 | GHC == 9.10.1 23 | GHC == 9.8.2 24 | GHC == 9.6.3 25 | GHC == 9.4.7 26 | GHC == 9.2.8 27 | GHC == 9.0.2 28 | GHC == 8.10.7 29 | GHC == 8.8.4 30 | GHC == 8.6.5 31 | GHC == 8.4.4 32 | GHC == 8.2.2 33 | 34 | flag BoundsChecks 35 | description: Enable bounds checking 36 | default: True 37 | 38 | flag UnsafeChecks 39 | description: Enable bounds checking in unsafe operations at the cost of a 40 | significant performance penalty. 41 | default: False 42 | 43 | flag InternalChecks 44 | description: Enable internal consistency checks at the cost of a 45 | significant performance penalty. 46 | default: False 47 | 48 | flag bench 49 | description: Build a benchmarking program to test vector-algorithms 50 | performance 51 | default: True 52 | 53 | -- flag dump-simpl 54 | -- description: Dumps the simplified core during compilation 55 | -- default: False 56 | 57 | flag llvm 58 | description: Build using llvm 59 | default: False 60 | 61 | source-repository head 62 | type: git 63 | location: https://github.com/erikd/vector-algorithms/ 64 | 65 | library 66 | hs-source-dirs: src 67 | default-language: Haskell2010 68 | 69 | build-depends: base >= 4.8 && < 5, 70 | bitvec >= 1.0 && < 1.2, 71 | vector >= 0.6 && < 0.14, 72 | primitive >= 0.6.2.0 && < 0.10, 73 | bytestring >= 0.9 && < 1 74 | 75 | if ! impl (ghc >= 7.8) 76 | build-depends: tagged >= 0.4 && < 0.9 77 | 78 | exposed-modules: 79 | Data.Vector.Algorithms 80 | Data.Vector.Algorithms.Optimal 81 | Data.Vector.Algorithms.Insertion 82 | Data.Vector.Algorithms.Intro 83 | Data.Vector.Algorithms.Merge 84 | Data.Vector.Algorithms.Radix 85 | Data.Vector.Algorithms.Search 86 | Data.Vector.Algorithms.Heap 87 | Data.Vector.Algorithms.AmericanFlag 88 | Data.Vector.Algorithms.Tim 89 | 90 | other-modules: 91 | Data.Vector.Algorithms.Common 92 | 93 | ghc-options: 94 | -funbox-strict-fields 95 | 96 | -- Cabal/Hackage complains about these 97 | -- if flag(dump-simpl) 98 | -- ghc-options: -ddump-simpl -ddump-to-file 99 | 100 | if flag(llvm) 101 | ghc-options: -fllvm 102 | 103 | include-dirs: 104 | include 105 | 106 | install-includes: 107 | vector.h 108 | 109 | if flag(BoundsChecks) 110 | cpp-options: -DVECTOR_BOUNDS_CHECKS 111 | 112 | if flag(UnsafeChecks) 113 | cpp-options: -DVECTOR_UNSAFE_CHECKS 114 | 115 | if flag(InternalChecks) 116 | cpp-options: -DVECTOR_INTERNAL_CHECKS 117 | 118 | benchmark simple-bench 119 | hs-source-dirs: bench/simple 120 | type: exitcode-stdio-1.0 121 | default-language: Haskell2010 122 | 123 | if !flag(bench) 124 | buildable: False 125 | 126 | main-is: Main.hs 127 | 128 | other-modules: 129 | Blocks 130 | 131 | build-depends: base, mwc-random, vector, vector-algorithms 132 | ghc-options: -Wall 133 | 134 | -- Cabal/Hackage complains about these 135 | -- if flag(dump-simpl) 136 | -- ghc-options: -ddump-simpl -ddump-to-file 137 | 138 | if flag(llvm) 139 | ghc-options: -fllvm 140 | 141 | test-suite properties 142 | hs-source-dirs: tests/properties 143 | type: exitcode-stdio-1.0 144 | main-is: Tests.hs 145 | default-language: Haskell2010 146 | 147 | other-modules: 148 | Optimal 149 | Properties 150 | Util 151 | 152 | build-depends: 153 | base >= 4.9, 154 | bytestring, 155 | containers, 156 | QuickCheck > 2.9 && < 2.16, 157 | vector, 158 | vector-algorithms 159 | 160 | if flag(llvm) 161 | ghc-options: -fllvm 162 | --------------------------------------------------------------------------------