├── .github └── workflows │ └── ci.yml ├── .gitignore ├── .hgignore ├── .hgtags ├── LICENSE ├── README.markdown ├── Statistics ├── Autocorrelation.hs ├── ConfidenceInt.hs ├── Correlation.hs ├── Correlation │ └── Kendall.hs ├── Distribution.hs ├── Distribution │ ├── Beta.hs │ ├── Binomial.hs │ ├── CauchyLorentz.hs │ ├── ChiSquared.hs │ ├── DiscreteUniform.hs │ ├── Exponential.hs │ ├── FDistribution.hs │ ├── Gamma.hs │ ├── Geometric.hs │ ├── Hypergeometric.hs │ ├── Laplace.hs │ ├── Lognormal.hs │ ├── NegativeBinomial.hs │ ├── Normal.hs │ ├── Poisson.hs │ ├── Poisson │ │ └── Internal.hs │ ├── StudentT.hs │ ├── Transform.hs │ ├── Uniform.hs │ └── Weibull.hs ├── Function.hs ├── Internal.hs ├── Quantile.hs ├── Regression.hs ├── Resampling.hs ├── Resampling │ └── Bootstrap.hs ├── Sample.hs ├── Sample │ ├── Histogram.hs │ ├── Internal.hs │ ├── KernelDensity.hs │ ├── KernelDensity │ │ └── Simple.hs │ ├── Normalize.hs │ └── Powers.hs ├── Test │ ├── ChiSquared.hs │ ├── Internal.hs │ ├── KolmogorovSmirnov.hs │ ├── KruskalWallis.hs │ ├── MannWhitneyU.hs │ ├── Runs.hs │ ├── StudentT.hs │ ├── Types.hs │ └── WilcoxonT.hs ├── Transform.hs ├── Types.hs └── Types │ └── Internal.hs ├── bench-papi └── Bench.hs ├── bench-time └── Bench.hs ├── benchmark └── bench.hs ├── cabal.project ├── changelog.md ├── dense-linear-algebra ├── LICENSE ├── README.md ├── dense-linear-algebra.cabal ├── src │ └── Statistics │ │ ├── Matrix.hs │ │ └── Matrix │ │ ├── Algorithms.hs │ │ ├── Function.hs │ │ ├── Mutable.hs │ │ └── Types.hs └── test │ ├── LibSpec.hs │ └── Spec.hs ├── examples └── kde │ ├── KDE.hs │ ├── data │ └── faithful.csv │ ├── kde.html │ └── kde.tpl ├── statistics.cabal └── tests ├── Tests ├── ApproxEq.hs ├── Correlation.hs ├── Distribution.hs ├── ExactDistribution.hs ├── Function.hs ├── Helpers.hs ├── KDE.hs ├── Matrix.hs ├── Matrix │ └── Types.hs ├── NonParametric.hs ├── NonParametric │ └── Table.hs ├── Orphanage.hs ├── Parametric.hs ├── Quantile.hs ├── Serialization.hs └── Transform.hs ├── doctest.hs ├── tests.hs └── utils ├── Makefile └── fftw.c /.github/workflows/ci.yml: -------------------------------------------------------------------------------- 1 | name: CI 2 | 3 | # Trigger the workflow on push or pull request, but only for the master branch 4 | on: 5 | pull_request: 6 | push: 7 | branches: [master] 8 | 9 | defaults: 10 | run: 11 | shell: bash 12 | 13 | jobs: 14 | cabal: 15 | name: ${{ matrix.os }} / ghc ${{ matrix.ghc }} 16 | runs-on: ${{ matrix.os }} 17 | strategy: 18 | matrix: 19 | include: 20 | # Linux 21 | - { cabal: "3.12", os: ubuntu-latest, ghc: "8.4.4" } 22 | - { cabal: "3.12", os: ubuntu-latest, ghc: "8.6.5" } 23 | - { cabal: "3.12", os: ubuntu-latest, ghc: "8.8.4" } 24 | - { cabal: "3.12", os: ubuntu-latest, ghc: "8.10.7" } 25 | - { cabal: "3.12", os: ubuntu-latest, ghc: "9.0.2" } 26 | - { cabal: "3.12", os: ubuntu-latest, ghc: "9.2.8" } 27 | - { cabal: "3.12", os: ubuntu-latest, ghc: "9.4.8" } 28 | - { cabal: "3.12", os: ubuntu-latest, ghc: "9.6.6" } 29 | - { cabal: "3.12", os: ubuntu-latest, ghc: "9.8.4" } 30 | - { cabal: "3.12", os: ubuntu-latest, ghc: "9.10.1" } 31 | # Fails to resolve aeson [2025.01.14] 32 | # - { cabal: "3.12", os: ubuntu-latest, ghc: "9.12.1" } 33 | fail-fast: false 34 | 35 | steps: 36 | # ---------------- 37 | - uses: actions/checkout@v4 38 | # ---------------- 39 | - uses: haskell-actions/setup@v2 40 | id: setup-haskell-cabal 41 | name: Setup Haskell 42 | with: 43 | ghc-version: ${{ matrix.ghc }} 44 | cabal-version: ${{ matrix.cabal }} 45 | # ---------------- 46 | - uses: actions/cache@v3 47 | name: Cache ~/.cabal/store 48 | with: 49 | path: ${{ steps.setup-haskell-cabal.outputs.cabal-store }} 50 | key: ${{ runner.os }}-${{ matrix.ghc }}--${{ github.Shah }}-CACHE_V3 51 | # ---------------- 52 | - name: "Install PAPI" 53 | run: | 54 | sudo apt-get install -y libpapi-dev 55 | # ---------------- 56 | - name: Versions 57 | run: | 58 | cabal -V 59 | ghc -V 60 | # ---------------- 61 | - name: Make sdist 62 | run: | 63 | mkdir sdist 64 | cabal sdist -o sdist 65 | - name: Unpack 66 | run: | 67 | mkdir unpacked 68 | tar -C unpacked -xzf sdist/statistics*tar.gz 69 | # ---------------- 70 | - name: cabal check 71 | run: | 72 | cd unpacked/statistics-* 73 | cabal -vnormal check 74 | # ---------------- 75 | - name: Build 76 | run: | 77 | cd unpacked/statistics-* 78 | cabal configure --haddock-all --enable-tests --enable-benchmarks 79 | cabal build all --write-ghc-environment-files=always 80 | # ---------------- 81 | - name: Test 82 | run: | 83 | cd unpacked/statistics-* 84 | cabal test all 85 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | dist 2 | dist-newstyle 3 | .ghc.environment.* 4 | cabal.sandbox.config 5 | .stack-work 6 | TAGS 7 | -------------------------------------------------------------------------------- /.hgignore: -------------------------------------------------------------------------------- 1 | ^dist$ 2 | \.(?:aux|eventlog|h[ip]|log|[oa]|orig|prof|ps|swp)$ 3 | ~$ 4 | syntax: glob 5 | .\#* 6 | -------------------------------------------------------------------------------- /.hgtags: -------------------------------------------------------------------------------- 1 | b0800d494d66a079356c40fc0a0f6c7943cfce3f 0.1 2 | c827bab8b4821bb3251b71fa350f13420849f3c9 0.2.2 3 | d270e9663fba54f27698b704c05258038549901b 0.2.1 4 | 1f3d1ad64c4fa810550615a085a5df37675d9d33 0.2 5 | b71affe5caf6d20ec80c2800797053d56c39a283 0.3.6 6 | 9daa8a8be77e1c1d80e33f33870e02cfb972af45 0.3.3 7 | 5befb235d2697a79ebaea6e1c329a86ad1ad65b7 0.3.4 8 | 6d59726b207a8e4c348ce3a4cc5fa8bae78db0a3 0.3.2 9 | e36be8ad5175714058d9c31f3ed8fc940bcbb32c 0.3 10 | 7d9e5ad43b3f40c05cd82a82b6dc9507caee5a44 0.3.1 11 | b38c4bbadd4f7a4c8fca2e821019e7eee24a546c 0.3.5 12 | 007059a2c8874c822d6285f85532ec78c05b54ec 0.4 13 | d6696151a449d08dcdb5070c7682dde824512b7a 0.4.1 14 | ea8afae6deac29b15938122c61fa0c5ea1bee68c 0.5.1.0 15 | 96e1f328a83222884da8e44f6bba000378738e3a 0.5.1.1 16 | 9e58943cd825fe7e1a774bcaf0c26293ae7586a3 0.5.1.2 17 | 612bf90089edda4c337bfdae29c33cac45dd05ba 0.5.0.0 18 | 6658e21f4841fa9e0c05f63589ddb08c302cd9c1 0.6.0.0 19 | 77600bbdf0687644238faa1470c45c356f5cb4f2 0.6.0.1 20 | c28c06a9b6a82c4a33abdd86cffbd629b1631dc8 0.7.0.0 21 | b8ca72bbe71b5945e112ff45d59d4b45fdabad9c 0.8.0.0 22 | 83f39a545916a9da040f485d1d7f1ca1f5318ca4 0.8.0.5 23 | a60219fe5db2dd4820abef3b71926b29eea3ef86 0.8.0.1 24 | 6b6af0a56b20124868489664902ebd228b5afc29 0.8.0.2 25 | 9296e74ddd0ca476e0bcd1bd855476dafc6c825b 0.8.0.3 26 | b537df70181bbf4c267d658939df0009db73298a 0.8.0.4 27 | dc13e819406268870a9e442a5d329207d0012f5f 0.9.0.0 28 | 16eb25b56e88b8bb119f4b4b096cd951bb89f752 0.10.0.1 29 | e64eaba9a6eece4145ca7c5aba5c9ba11da99dfe 0.10.0.0 30 | eb901892b02af9efd5b8b46c77fe289240b6c69a 0.10.1.0 31 | e0171baa0d1f98d7b196a8bcbc9df889abc248bd 0.10.2.0 32 | beff76cd665adf846968b0248c6a1fc3daef005c 0.10.3.0 33 | 34ee28fbbc7fdd03739bae3094c6f4e511392dd6 0.10.3.1 34 | c1c80573883800f2c73ff5a31212d8c688a7d562 0.10.4.0 35 | d916be3f82405fa466005fa11c9bdc8c968cfff9 0.10.4.1 36 | 9711a2d79dfd882fdeb39479a4b2f4365dda448f 0.10.5.1 37 | 269f9457083ae21cdc785f623673b627361efe09 0.10.5.2 38 | d6fc5ae89aafbe335200f2c8d2ba2117322bd206 0.11.0.0 39 | 2f8fab6443bfb77c499852e6499df7ecd7f8c6ce 0.11.0.1 40 | f1452afc54f07559f5c85cb71db7a7e38d1456f1 0.11.0.2 41 | c75dd236003cf7eadab302d31f7523767407ea92 0.11.0.3 42 | 6de06c0e42a93bb602573fe15bab9fa32f82857a 0.12.0.0 43 | 6e37ee886a2d5dc8bf56e1d8e38b4cb08a845b90 0.13.1.0 44 | 2eed7bb5d817365a264ffc9b51f60e54d12cf328 0.13.1.1 45 | 74572dc8487579c11f69400b91863eb1a150e507 0.13.2.0 46 | e9452e9e69cedccbcbd13e407c38cca6286ac66f 0.13.2.2 47 | b4307c59867021ffc58f5afa62121f99b8d352ff 0.13.2.1 48 | 4302388f1acb398ec24f959175542be7addee36f 0.13.2.3 49 | ca30efd89890b8a6c871049111be08cab6eefc21 0.13.3.0 50 | 5335fdac4baf7efefb8409743a5ba9d173ec8845 0.14.0.0 51 | 58427e7c3066951165dbefe0c4d976ce51689372 0.14.0.1 52 | d3e97bb448d7462b4e49532b7632a213cf874746 0.14.0.2 53 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Copyright (c) 2009, 2010 Bryan O'Sullivan 2 | All rights reserved. 3 | 4 | Redistribution and use in source and binary forms, with or without 5 | modification, are permitted provided that the following conditions 6 | are met: 7 | 8 | * Redistributions of source code must retain the above copyright 9 | notice, this list of conditions and the following disclaimer. 10 | 11 | * Redistributions in binary form must reproduce the above 12 | copyright notice, this list of conditions and the following 13 | disclaimer in the documentation and/or other materials provided 14 | with the distribution. 15 | 16 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 17 | "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 18 | LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 19 | A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 20 | OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 21 | SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 22 | LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 23 | DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 24 | THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 25 | (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 26 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 27 | -------------------------------------------------------------------------------- /README.markdown: -------------------------------------------------------------------------------- 1 | # Statistics: efficient, general purpose statistics 2 | 3 | This package provides the Statistics module, a Haskell library for 4 | working with statistical data in a space- and time-efficient way. 5 | 6 | Where possible, we give citations and computational complexity 7 | estimates for the algorithms used. 8 | 9 | 10 | # Performance 11 | 12 | This library has been carefully optimised for high performance. To 13 | obtain the best runtime efficiency, it is imperative to compile 14 | libraries and applications that use this library using a high level of 15 | optimisation. 16 | 17 | 18 | # Get involved! 19 | 20 | Please report bugs via the 21 | [github issue tracker](https://github.com/haskell/statistics/issues). 22 | 23 | Master [git mirror](https://github.com/haskell/statistics): 24 | 25 | * `git clone git://github.com/haskell/statistics.git` 26 | 27 | # Authors 28 | 29 | This library is written and maintained by Bryan O'Sullivan, 30 | . 31 | -------------------------------------------------------------------------------- /Statistics/Autocorrelation.hs: -------------------------------------------------------------------------------- 1 | {-# LANGUAGE FlexibleContexts #-} 2 | -- | 3 | -- Module : Statistics.Autocorrelation 4 | -- Copyright : (c) 2009 Bryan O'Sullivan 5 | -- License : BSD3 6 | -- 7 | -- Maintainer : bos@serpentine.com 8 | -- Stability : experimental 9 | -- Portability : portable 10 | -- 11 | -- Functions for computing autocovariance and autocorrelation of a 12 | -- sample. 13 | 14 | module Statistics.Autocorrelation 15 | ( 16 | autocovariance 17 | , autocorrelation 18 | ) where 19 | 20 | import Prelude hiding (sum) 21 | import Statistics.Function (square) 22 | import Statistics.Sample (mean) 23 | import Statistics.Sample.Internal (sum) 24 | import qualified Data.Vector.Generic as G 25 | 26 | -- | Compute the autocovariance of a sample, i.e. the covariance of 27 | -- the sample against a shifted version of itself. 28 | autocovariance :: (G.Vector v Double, G.Vector v Int) => v Double -> v Double 29 | autocovariance a = G.map f . G.enumFromTo 0 $ l-2 30 | where 31 | f k = sum (G.zipWith (*) (G.take (l-k) c) (G.slice k (l-k) c)) 32 | / fromIntegral l 33 | c = G.map (subtract (mean a)) a 34 | l = G.length a 35 | 36 | -- | Compute the autocorrelation function of a sample, and the upper 37 | -- and lower bounds of confidence intervals for each element. 38 | -- 39 | -- /Note/: The calculation of the 95% confidence interval assumes a 40 | -- stationary Gaussian process. 41 | autocorrelation :: (G.Vector v Double, G.Vector v Int) => v Double -> (v Double, v Double, v Double) 42 | autocorrelation a = (r, ci (-), ci (+)) 43 | where 44 | r = G.map (/ G.head c) c 45 | where c = autocovariance a 46 | dllse = G.map f . G.scanl1 (+) . G.map square $ r 47 | where f v = 1.96 * sqrt ((v * 2 + 1) / l) 48 | l = fromIntegral (G.length a) 49 | ci f = G.cons 1 . G.tail . G.map (f (-1/l)) $ dllse 50 | -------------------------------------------------------------------------------- /Statistics/ConfidenceInt.hs: -------------------------------------------------------------------------------- 1 | {-# LANGUAGE ViewPatterns #-} 2 | -- | Calculation of confidence intervals 3 | module Statistics.ConfidenceInt ( 4 | poissonCI 5 | , poissonNormalCI 6 | , binomialCI 7 | , naiveBinomialCI 8 | -- * References 9 | -- $references 10 | ) where 11 | 12 | import Statistics.Distribution 13 | import Statistics.Distribution.ChiSquared 14 | import Statistics.Distribution.Beta 15 | import Statistics.Types 16 | 17 | 18 | 19 | -- | Calculate confidence intervals for Poisson-distributed value 20 | -- using normal approximation 21 | poissonNormalCI :: Int -> Estimate NormalErr Double 22 | poissonNormalCI n 23 | | n < 0 = error "Statistics.ConfidenceInt.poissonNormalCI negative number of trials" 24 | | otherwise = estimateNormErr n' (sqrt n') 25 | where 26 | n' = fromIntegral n 27 | 28 | -- | Calculate confidence intervals for Poisson-distributed value for 29 | -- single measurement. These are exact confidence intervals 30 | poissonCI :: CL Double -> Int -> Estimate ConfInt Double 31 | poissonCI cl@(significanceLevel -> p) n 32 | | n < 0 = error "Statistics.ConfidenceInt.poissonCI: negative number of trials" 33 | | n == 0 = estimateFromInterval m (0 ,m2) cl 34 | | otherwise = estimateFromInterval m (m1,m2) cl 35 | where 36 | m = fromIntegral n 37 | m1 = 0.5 * quantile (chiSquared (2*n )) (p/2) 38 | m2 = 0.5 * complQuantile (chiSquared (2*n+2)) (p/2) 39 | 40 | -- | Calculate confidence interval using normal approximation. Note 41 | -- that this approximation breaks down when /p/ is either close to 0 42 | -- or to 1. In particular if @np < 5@ or @1 - np < 5@ this 43 | -- approximation shouldn't be used. 44 | naiveBinomialCI :: Int -- ^ Number of trials 45 | -> Int -- ^ Number of successes 46 | -> Estimate NormalErr Double 47 | naiveBinomialCI n k 48 | | n <= 0 || k < 0 = error "Statistics.ConfidenceInt.naiveBinomialCI: negative number of events" 49 | | k > n = error "Statistics.ConfidenceInt.naiveBinomialCI: more successes than trials" 50 | | otherwise = estimateNormErr p σ 51 | where 52 | p = fromIntegral k / fromIntegral n 53 | σ = sqrt $ p * (1 - p) / fromIntegral n 54 | 55 | 56 | -- | Clopper-Pearson confidence interval also known as exact 57 | -- confidence intervals. 58 | binomialCI :: CL Double 59 | -> Int -- ^ Number of trials 60 | -> Int -- ^ Number of successes 61 | -> Estimate ConfInt Double 62 | binomialCI cl@(significanceLevel -> p) ni ki 63 | | ni <= 0 || ki < 0 = error "Statistics.ConfidenceInt.binomialCI: negative number of events" 64 | | ki > ni = error "Statistics.ConfidenceInt.binomialCI: more successes than trials" 65 | | ki == 0 = estimateFromInterval eff (0, ub) cl 66 | | ni == ki = estimateFromInterval eff (lb,0 ) cl 67 | | otherwise = estimateFromInterval eff (lb,ub) cl 68 | where 69 | k = fromIntegral ki 70 | n = fromIntegral ni 71 | eff = k / n 72 | lb = quantile (betaDistr k (n - k + 1)) (p/2) 73 | ub = complQuantile (betaDistr (k + 1) (n - k) ) (p/2) 74 | 75 | 76 | -- $references 77 | -- 78 | -- * Clopper, C.; Pearson, E. S. (1934). "The use of confidence or 79 | -- fiducial limits illustrated in the case of the 80 | -- binomial". Biometrika 26: 404–413. doi:10.1093/biomet/26.4.404 81 | -- 82 | -- * Brown, Lawrence D.; Cai, T. Tony; DasGupta, Anirban 83 | -- (2001). "Interval Estimation for a Binomial Proportion". Statistical 84 | -- Science 16 (2): 101–133. doi:10.1214/ss/1009213286. MR 1861069. 85 | -- Zbl 02068924. 86 | -------------------------------------------------------------------------------- /Statistics/Correlation.hs: -------------------------------------------------------------------------------- 1 | {-# LANGUAGE FlexibleContexts #-} 2 | {-# LANGUAGE BangPatterns #-} 3 | -- | 4 | -- Module : Statistics.Correlation.Pearson 5 | -- 6 | module Statistics.Correlation 7 | ( -- * Pearson correlation 8 | pearson 9 | , pearson2 10 | , pearsonMatByRow 11 | -- * Spearman correlation 12 | , spearman 13 | , spearman2 14 | , spearmanMatByRow 15 | ) where 16 | 17 | import qualified Data.Vector.Generic as G 18 | import qualified Data.Vector.Unboxed as U 19 | import Statistics.Matrix 20 | import Statistics.Sample 21 | import Statistics.Test.Internal (rankUnsorted) 22 | 23 | 24 | ---------------------------------------------------------------- 25 | -- Pearson 26 | ---------------------------------------------------------------- 27 | 28 | -- | Pearson correlation for sample of pairs. Exactly same as 29 | -- 'Statistics.Sample.correlation' 30 | pearson :: (G.Vector v (Double, Double)) 31 | => v (Double, Double) -> Double 32 | pearson = correlation 33 | {-# INLINE pearson #-} 34 | 35 | -- | Pearson correlation for sample of pairs. Exactly same as 36 | -- 'Statistics.Sample.correlation' 37 | pearson2 :: (G.Vector v Double) 38 | => v Double -> v Double -> Double 39 | pearson2 = correlation2 40 | {-# INLINE pearson2 #-} 41 | 42 | -- | Compute pairwise Pearson correlation between rows of a matrix 43 | pearsonMatByRow :: Matrix -> Matrix 44 | pearsonMatByRow m 45 | = generateSym (rows m) 46 | (\i j -> pearson $ row m i `U.zip` row m j) 47 | {-# INLINE pearsonMatByRow #-} 48 | 49 | 50 | 51 | ---------------------------------------------------------------- 52 | -- Spearman 53 | ---------------------------------------------------------------- 54 | 55 | -- | Compute Spearman correlation between two samples 56 | spearman :: ( Ord a 57 | , Ord b 58 | , G.Vector v a 59 | , G.Vector v b 60 | , G.Vector v (a, b) 61 | , G.Vector v Int 62 | , G.Vector v (Int, a) 63 | , G.Vector v (Int, b) 64 | ) 65 | => v (a, b) 66 | -> Double 67 | spearman xy 68 | = pearson 69 | $ G.zip (rankUnsorted x) (rankUnsorted y) 70 | where 71 | (x, y) = G.unzip xy 72 | {-# INLINE spearman #-} 73 | 74 | -- | Compute Spearman correlation between two samples. Samples must 75 | -- have same length. 76 | spearman2 :: ( Ord a 77 | , Ord b 78 | , G.Vector v a 79 | , G.Vector v b 80 | , G.Vector v Int 81 | , G.Vector v (Int, a) 82 | , G.Vector v (Int, b) 83 | ) 84 | => v a 85 | -> v b 86 | -> Double 87 | spearman2 xs ys 88 | | nx /= ny = error "Statistics.Correlation.spearman2: samples must have same length" 89 | | otherwise = pearson $ G.zip (rankUnsorted xs) (rankUnsorted ys) 90 | where 91 | nx = G.length xs 92 | ny = G.length ys 93 | {-# INLINE spearman2 #-} 94 | 95 | -- | compute pairwise Spearman correlation between rows of a matrix 96 | spearmanMatByRow :: Matrix -> Matrix 97 | spearmanMatByRow 98 | = pearsonMatByRow . fromRows . fmap rankUnsorted . toRows 99 | {-# INLINE spearmanMatByRow #-} 100 | -------------------------------------------------------------------------------- /Statistics/Correlation/Kendall.hs: -------------------------------------------------------------------------------- 1 | {-# LANGUAGE BangPatterns, FlexibleContexts #-} 2 | -- | 3 | -- Module : Statistics.Correlation.Kendall 4 | -- 5 | -- Fast O(NlogN) implementation of 6 | -- . 7 | -- 8 | -- This module implements Kendall's tau form b which allows ties in the data. 9 | -- This is the same formula used by other statistical packages, e.g., R, matlab. 10 | -- 11 | -- > \tau = \frac{n_c - n_d}{\sqrt{(n_0 - n_1)(n_0 - n_2)}} 12 | -- 13 | -- where n_0 = n(n-1)\/2, n_1 = number of pairs tied for the first quantify, 14 | -- n_2 = number of pairs tied for the second quantify, 15 | -- n_c = number of concordant pairs$, n_d = number of discordant pairs. 16 | 17 | module Statistics.Correlation.Kendall 18 | ( kendall 19 | 20 | -- * References 21 | -- $references 22 | ) where 23 | 24 | import Control.Monad.ST (ST, runST) 25 | import Data.Bits (shiftR) 26 | import Data.Function (on) 27 | import Data.STRef 28 | import qualified Data.Vector.Algorithms.Intro as I 29 | import qualified Data.Vector.Generic as G 30 | import qualified Data.Vector.Generic.Mutable as GM 31 | 32 | -- | /O(nlogn)/ Compute the Kendall's tau from a vector of paired data. 33 | -- Return NaN when number of pairs <= 1. 34 | kendall :: (Ord a, Ord b, G.Vector v (a, b)) => v (a, b) -> Double 35 | kendall xy' 36 | | G.length xy' <= 1 = 0/0 37 | | otherwise = runST $ do 38 | xy <- G.thaw xy' 39 | let n = GM.length xy 40 | n_dRef <- newSTRef 0 41 | I.sort xy 42 | tieX <- numOfTiesBy ((==) `on` fst) xy 43 | tieXY <- numOfTiesBy (==) xy 44 | tmp <- GM.new n 45 | mergeSort (compare `on` snd) xy tmp n_dRef 46 | tieY <- numOfTiesBy ((==) `on` snd) xy 47 | n_d <- readSTRef n_dRef 48 | let n_0 = (fromIntegral n * (fromIntegral n-1)) `shiftR` 1 :: Integer 49 | n_c = n_0 - n_d - tieX - tieY + tieXY 50 | return $ fromIntegral (n_c - n_d) / 51 | (sqrt.fromIntegral) ((n_0 - tieX) * (n_0 - tieY)) 52 | {-# INLINE kendall #-} 53 | 54 | -- calculate number of tied pairs in a sorted vector 55 | numOfTiesBy :: GM.MVector v a 56 | => (a -> a -> Bool) -> v s a -> ST s Integer 57 | numOfTiesBy f xs = do count <- newSTRef (0::Integer) 58 | loop count (1::Int) (0::Int) 59 | readSTRef count 60 | where 61 | n = GM.length xs 62 | loop c !acc !i | i >= n - 1 = modifySTRef' c (+ g acc) 63 | | otherwise = do 64 | x1 <- GM.unsafeRead xs i 65 | x2 <- GM.unsafeRead xs (i+1) 66 | if f x1 x2 67 | then loop c (acc+1) (i+1) 68 | else modifySTRef' c (+ g acc) >> loop c 1 (i+1) 69 | g x = fromIntegral ((x * (x - 1)) `shiftR` 1) 70 | {-# INLINE numOfTiesBy #-} 71 | 72 | -- Implementation of Knight's merge sort (adapted from vector-algorithm). This 73 | -- function is used to count the number of discordant pairs. 74 | mergeSort :: GM.MVector v e 75 | => (e -> e -> Ordering) 76 | -> v s e 77 | -> v s e 78 | -> STRef s Integer 79 | -> ST s () 80 | mergeSort cmp src buf count = loop 0 (GM.length src - 1) 81 | where 82 | loop l u 83 | | u == l = return () 84 | | u - l == 1 = do 85 | eL <- GM.unsafeRead src l 86 | eU <- GM.unsafeRead src u 87 | case cmp eL eU of 88 | GT -> do GM.unsafeWrite src l eU 89 | GM.unsafeWrite src u eL 90 | modifySTRef' count (+1) 91 | _ -> return () 92 | | otherwise = do 93 | let mid = (u + l) `shiftR` 1 94 | loop l mid 95 | loop mid u 96 | merge cmp (GM.unsafeSlice l (u-l+1) src) buf (mid - l) count 97 | {-# INLINE mergeSort #-} 98 | 99 | merge :: GM.MVector v e 100 | => (e -> e -> Ordering) 101 | -> v s e 102 | -> v s e 103 | -> Int 104 | -> STRef s Integer 105 | -> ST s () 106 | merge cmp src buf mid count = do GM.unsafeCopy tmp lower 107 | eTmp <- GM.unsafeRead tmp 0 108 | eUpp <- GM.unsafeRead upper 0 109 | loop tmp 0 eTmp upper 0 eUpp 0 110 | where 111 | lower = GM.unsafeSlice 0 mid src 112 | upper = GM.unsafeSlice mid (GM.length src - mid) src 113 | tmp = GM.unsafeSlice 0 mid buf 114 | wroteHigh low iLow eLow high iHigh iIns 115 | | iHigh >= GM.length high = 116 | GM.unsafeCopy (GM.unsafeSlice iIns (GM.length low - iLow) src) 117 | (GM.unsafeSlice iLow (GM.length low - iLow) low) 118 | | otherwise = do eHigh <- GM.unsafeRead high iHigh 119 | loop low iLow eLow high iHigh eHigh iIns 120 | 121 | wroteLow low iLow high iHigh eHigh iIns 122 | | iLow >= GM.length low = return () 123 | | otherwise = do eLow <- GM.unsafeRead low iLow 124 | loop low iLow eLow high iHigh eHigh iIns 125 | 126 | loop !low !iLow !eLow !high !iHigh !eHigh !iIns = case cmp eHigh eLow of 127 | LT -> do GM.unsafeWrite src iIns eHigh 128 | modifySTRef' count (+ fromIntegral (GM.length low - iLow)) 129 | wroteHigh low iLow eLow high (iHigh+1) (iIns+1) 130 | _ -> do GM.unsafeWrite src iIns eLow 131 | wroteLow low (iLow+1) high iHigh eHigh (iIns+1) 132 | {-# INLINE merge #-} 133 | 134 | -- $references 135 | -- 136 | -- * William R. Knight. (1966) A computer method for calculating Kendall's Tau 137 | -- with ungrouped data. /Journal of the American Statistical Association/, 138 | -- Vol. 61, No. 314, Part 1, pp. 436-439. 139 | -- 140 | -------------------------------------------------------------------------------- /Statistics/Distribution/Beta.hs: -------------------------------------------------------------------------------- 1 | {-# LANGUAGE OverloadedStrings #-} 2 | {-# LANGUAGE DeriveDataTypeable, DeriveGeneric #-} 3 | ----------------------------------------------------------------------------- 4 | -- | 5 | -- Module : Statistics.Distribution.Beta 6 | -- Copyright : (C) 2012 Edward Kmett, 7 | -- License : BSD-style (see the file LICENSE) 8 | -- 9 | -- Maintainer : Edward Kmett 10 | -- Stability : provisional 11 | -- Portability : DeriveDataTypeable 12 | -- 13 | ---------------------------------------------------------------------------- 14 | module Statistics.Distribution.Beta 15 | ( BetaDistribution 16 | -- * Constructor 17 | , betaDistr 18 | , betaDistrE 19 | , improperBetaDistr 20 | , improperBetaDistrE 21 | -- * Accessors 22 | , bdAlpha 23 | , bdBeta 24 | ) where 25 | 26 | import Control.Applicative 27 | import Data.Aeson (FromJSON(..), ToJSON, Value(..), (.:)) 28 | import Data.Binary (Binary(..)) 29 | import Data.Data (Data, Typeable) 30 | import GHC.Generics (Generic) 31 | import Numeric.SpecFunctions ( 32 | incompleteBeta, invIncompleteBeta, logBeta, digamma, log1p) 33 | import Numeric.MathFunctions.Constants (m_NaN,m_neg_inf) 34 | import qualified Statistics.Distribution as D 35 | import Statistics.Internal 36 | 37 | 38 | -- | The beta distribution 39 | data BetaDistribution = BD 40 | { bdAlpha :: {-# UNPACK #-} !Double 41 | -- ^ Alpha shape parameter 42 | , bdBeta :: {-# UNPACK #-} !Double 43 | -- ^ Beta shape parameter 44 | } deriving (Eq, Typeable, Data, Generic) 45 | 46 | instance Show BetaDistribution where 47 | showsPrec n (BD a b) = defaultShow2 "improperBetaDistr" a b n 48 | instance Read BetaDistribution where 49 | readPrec = defaultReadPrecM2 "improperBetaDistr" improperBetaDistrE 50 | 51 | instance ToJSON BetaDistribution 52 | instance FromJSON BetaDistribution where 53 | parseJSON (Object v) = do 54 | a <- v .: "bdAlpha" 55 | b <- v .: "bdBeta" 56 | maybe (fail $ errMsgI a b) return $ improperBetaDistrE a b 57 | parseJSON _ = empty 58 | 59 | instance Binary BetaDistribution where 60 | put (BD a b) = put a >> put b 61 | get = do 62 | a <- get 63 | b <- get 64 | maybe (fail $ errMsgI a b) return $ improperBetaDistrE a b 65 | 66 | 67 | -- | Create beta distribution. Both shape parameters must be positive. 68 | betaDistr :: Double -- ^ Shape parameter alpha 69 | -> Double -- ^ Shape parameter beta 70 | -> BetaDistribution 71 | betaDistr a b = maybe (error $ errMsg a b) id $ betaDistrE a b 72 | 73 | -- | Create beta distribution. Both shape parameters must be positive. 74 | betaDistrE :: Double -- ^ Shape parameter alpha 75 | -> Double -- ^ Shape parameter beta 76 | -> Maybe BetaDistribution 77 | betaDistrE a b 78 | | a > 0 && b > 0 = Just (BD a b) 79 | | otherwise = Nothing 80 | 81 | errMsg :: Double -> Double -> String 82 | errMsg a b = "Statistics.Distribution.Beta.betaDistr: " 83 | ++ "shape parameters must be positive. Got a = " 84 | ++ show a 85 | ++ " b = " 86 | ++ show b 87 | 88 | 89 | -- | Create beta distribution. Both shape parameters must be 90 | -- non-negative. So it allows to construct improper beta distribution 91 | -- which could be used as improper prior. 92 | improperBetaDistr :: Double -- ^ Shape parameter alpha 93 | -> Double -- ^ Shape parameter beta 94 | -> BetaDistribution 95 | improperBetaDistr a b 96 | = maybe (error $ errMsgI a b) id $ improperBetaDistrE a b 97 | 98 | -- | Create beta distribution. Both shape parameters must be 99 | -- non-negative. So it allows to construct improper beta distribution 100 | -- which could be used as improper prior. 101 | improperBetaDistrE :: Double -- ^ Shape parameter alpha 102 | -> Double -- ^ Shape parameter beta 103 | -> Maybe BetaDistribution 104 | improperBetaDistrE a b 105 | | a >= 0 && b >= 0 = Just (BD a b) 106 | | otherwise = Nothing 107 | 108 | errMsgI :: Double -> Double -> String 109 | errMsgI a b 110 | = "Statistics.Distribution.Beta.betaDistr: " 111 | ++ "shape parameters must be non-negative. Got a = " ++ show a 112 | ++ " b = " ++ show b 113 | 114 | 115 | 116 | instance D.Distribution BetaDistribution where 117 | cumulative (BD a b) x 118 | | x <= 0 = 0 119 | | x >= 1 = 1 120 | | otherwise = incompleteBeta a b x 121 | complCumulative (BD a b) x 122 | | x <= 0 = 1 123 | | x >= 1 = 0 124 | -- For small x we use direct computation to avoid precision loss 125 | -- when computing (1-x) 126 | | x < 0.5 = 1 - incompleteBeta a b x 127 | -- Otherwise we use property of incomplete beta: 128 | -- > I(x,a,b) = 1 - I(1-x,b,a) 129 | | otherwise = incompleteBeta b a (1-x) 130 | 131 | instance D.Mean BetaDistribution where 132 | mean (BD a b) = a / (a + b) 133 | 134 | instance D.MaybeMean BetaDistribution where 135 | maybeMean = Just . D.mean 136 | 137 | instance D.Variance BetaDistribution where 138 | variance (BD a b) = a*b / (apb*apb*(apb+1)) 139 | where apb = a + b 140 | 141 | instance D.MaybeVariance BetaDistribution where 142 | maybeVariance = Just . D.variance 143 | 144 | instance D.Entropy BetaDistribution where 145 | entropy (BD a b) = 146 | logBeta a b 147 | - (a-1) * digamma a 148 | - (b-1) * digamma b 149 | + (a+b-2) * digamma (a+b) 150 | 151 | instance D.MaybeEntropy BetaDistribution where 152 | maybeEntropy = Just . D.entropy 153 | 154 | instance D.ContDistr BetaDistribution where 155 | density (BD a b) x 156 | | a <= 0 || b <= 0 = m_NaN 157 | | x <= 0 = 0 158 | | x >= 1 = 0 159 | | otherwise = exp $ (a-1)*log x + (b-1) * log1p (-x) - logBeta a b 160 | logDensity (BD a b) x 161 | | a <= 0 || b <= 0 = m_NaN 162 | | x <= 0 = m_neg_inf 163 | | x >= 1 = m_neg_inf 164 | | otherwise = (a-1)*log x + (b-1)*log1p (-x) - logBeta a b 165 | 166 | quantile (BD a b) p 167 | | p == 0 = 0 168 | | p == 1 = 1 169 | | p > 0 && p < 1 = invIncompleteBeta a b p 170 | | otherwise = 171 | error $ "Statistics.Distribution.Gamma.quantile: p must be in [0,1] range. Got: "++show p 172 | 173 | instance D.ContGen BetaDistribution where 174 | genContVar = D.genContinuous 175 | -------------------------------------------------------------------------------- /Statistics/Distribution/Binomial.hs: -------------------------------------------------------------------------------- 1 | {-# LANGUAGE OverloadedStrings #-} 2 | {-# LANGUAGE PatternGuards #-} 3 | {-# LANGUAGE DeriveDataTypeable, DeriveGeneric #-} 4 | -- | 5 | -- Module : Statistics.Distribution.Binomial 6 | -- Copyright : (c) 2009 Bryan O'Sullivan 7 | -- License : BSD3 8 | -- 9 | -- Maintainer : bos@serpentine.com 10 | -- Stability : experimental 11 | -- Portability : portable 12 | -- 13 | -- The binomial distribution. This is the discrete probability 14 | -- distribution of the number of successes in a sequence of /n/ 15 | -- independent yes\/no experiments, each of which yields success with 16 | -- probability /p/. 17 | 18 | module Statistics.Distribution.Binomial 19 | ( 20 | BinomialDistribution 21 | -- * Constructors 22 | , binomial 23 | , binomialE 24 | -- * Accessors 25 | , bdTrials 26 | , bdProbability 27 | ) where 28 | 29 | import Control.Applicative 30 | import Data.Aeson (FromJSON(..), ToJSON, Value(..), (.:)) 31 | import Data.Binary (Binary(..)) 32 | import Data.Data (Data, Typeable) 33 | import GHC.Generics (Generic) 34 | import Numeric.SpecFunctions (choose,logChoose,incompleteBeta,log1p) 35 | import Numeric.MathFunctions.Constants (m_epsilon,m_tiny) 36 | 37 | import qualified Statistics.Distribution as D 38 | import qualified Statistics.Distribution.Poisson.Internal as I 39 | import Statistics.Internal 40 | 41 | 42 | -- | The binomial distribution. 43 | data BinomialDistribution = BD { 44 | bdTrials :: {-# UNPACK #-} !Int 45 | -- ^ Number of trials. 46 | , bdProbability :: {-# UNPACK #-} !Double 47 | -- ^ Probability. 48 | } deriving (Eq, Typeable, Data, Generic) 49 | 50 | instance Show BinomialDistribution where 51 | showsPrec i (BD n p) = defaultShow2 "binomial" n p i 52 | instance Read BinomialDistribution where 53 | readPrec = defaultReadPrecM2 "binomial" binomialE 54 | 55 | instance ToJSON BinomialDistribution 56 | instance FromJSON BinomialDistribution where 57 | parseJSON (Object v) = do 58 | n <- v .: "bdTrials" 59 | p <- v .: "bdProbability" 60 | maybe (fail $ errMsg n p) return $ binomialE n p 61 | parseJSON _ = empty 62 | 63 | instance Binary BinomialDistribution where 64 | put (BD x y) = put x >> put y 65 | get = do 66 | n <- get 67 | p <- get 68 | maybe (fail $ errMsg n p) return $ binomialE n p 69 | 70 | 71 | 72 | instance D.Distribution BinomialDistribution where 73 | cumulative = cumulative 74 | complCumulative = complCumulative 75 | 76 | instance D.DiscreteDistr BinomialDistribution where 77 | probability = probability 78 | logProbability = logProbability 79 | 80 | instance D.Mean BinomialDistribution where 81 | mean = mean 82 | 83 | instance D.Variance BinomialDistribution where 84 | variance = variance 85 | 86 | instance D.MaybeMean BinomialDistribution where 87 | maybeMean = Just . D.mean 88 | 89 | instance D.MaybeVariance BinomialDistribution where 90 | maybeStdDev = Just . D.stdDev 91 | maybeVariance = Just . D.variance 92 | 93 | instance D.Entropy BinomialDistribution where 94 | entropy (BD n p) 95 | | n == 0 = 0 96 | | n <= 100 = directEntropy (BD n p) 97 | | otherwise = I.poissonEntropy (fromIntegral n * p) 98 | 99 | instance D.MaybeEntropy BinomialDistribution where 100 | maybeEntropy = Just . D.entropy 101 | 102 | -- This could be slow for big n 103 | probability :: BinomialDistribution -> Int -> Double 104 | probability (BD n p) k 105 | | k < 0 || k > n = 0 106 | | n == 0 = 1 107 | -- choose could overflow Double for n >= 1030 so we switch to 108 | -- log-domain to calculate probability 109 | -- 110 | -- We also want to avoid underflow when computing p^k & 111 | -- (1-p)^(n-k). 112 | | n < 1000 113 | , pK >= m_tiny 114 | , pNK >= m_tiny = choose n k * pK * pNK 115 | | otherwise = exp $ logChoose n k + log p * k' + log1p (-p) * nk' 116 | where 117 | pK = p^k 118 | pNK = (1-p)^(n-k) 119 | k' = fromIntegral k 120 | nk' = fromIntegral $ n - k 121 | 122 | logProbability :: BinomialDistribution -> Int -> Double 123 | logProbability (BD n p) k 124 | | k < 0 || k > n = (-1)/0 125 | | n == 0 = 0 126 | | otherwise = logChoose n k + log p * k' + log1p (-p) * nk' 127 | where 128 | k' = fromIntegral k 129 | nk' = fromIntegral $ n - k 130 | 131 | cumulative :: BinomialDistribution -> Double -> Double 132 | cumulative (BD n p) x 133 | | isNaN x = error "Statistics.Distribution.Binomial.cumulative: NaN input" 134 | | isInfinite x = if x > 0 then 1 else 0 135 | | k < 0 = 0 136 | | k >= n = 1 137 | | otherwise = incompleteBeta (fromIntegral (n-k)) (fromIntegral (k+1)) (1 - p) 138 | where 139 | k = floor x 140 | 141 | complCumulative :: BinomialDistribution -> Double -> Double 142 | complCumulative (BD n p) x 143 | | isNaN x = error "Statistics.Distribution.Binomial.complCumulative: NaN input" 144 | | isInfinite x = if x > 0 then 0 else 1 145 | | k < 0 = 1 146 | | k >= n = 0 147 | | otherwise = incompleteBeta (fromIntegral (k+1)) (fromIntegral (n-k)) p 148 | where 149 | k = floor x 150 | 151 | mean :: BinomialDistribution -> Double 152 | mean (BD n p) = fromIntegral n * p 153 | 154 | variance :: BinomialDistribution -> Double 155 | variance (BD n p) = fromIntegral n * p * (1 - p) 156 | 157 | directEntropy :: BinomialDistribution -> Double 158 | directEntropy d@(BD n _) = 159 | negate . sum $ 160 | takeWhile (< negate m_epsilon) $ 161 | dropWhile (not . (< negate m_epsilon)) $ 162 | [ let x = probability d k in x * log x | k <- [0..n]] 163 | 164 | -- | Construct binomial distribution. Number of trials must be 165 | -- non-negative and probability must be in [0,1] range 166 | binomial :: Int -- ^ Number of trials. 167 | -> Double -- ^ Probability. 168 | -> BinomialDistribution 169 | binomial n p = maybe (error $ errMsg n p) id $ binomialE n p 170 | 171 | -- | Construct binomial distribution. Number of trials must be 172 | -- non-negative and probability must be in [0,1] range 173 | binomialE :: Int -- ^ Number of trials. 174 | -> Double -- ^ Probability. 175 | -> Maybe BinomialDistribution 176 | binomialE n p 177 | | n < 0 = Nothing 178 | | p >= 0 && p <= 1 = Just (BD n p) 179 | | otherwise = Nothing 180 | 181 | errMsg :: Int -> Double -> String 182 | errMsg n p 183 | = "Statistics.Distribution.Binomial.binomial: n=" ++ show n 184 | ++ " p=" ++ show p ++ "but n>=0 and p in [0,1]" 185 | -------------------------------------------------------------------------------- /Statistics/Distribution/CauchyLorentz.hs: -------------------------------------------------------------------------------- 1 | {-# LANGUAGE OverloadedStrings #-} 2 | {-# LANGUAGE DeriveDataTypeable, DeriveGeneric #-} 3 | -- | 4 | -- Module : Statistics.Distribution.CauchyLorentz 5 | -- Copyright : (c) 2011 Aleksey Khudyakov 6 | -- License : BSD3 7 | -- 8 | -- Maintainer : bos@serpentine.com 9 | -- Stability : experimental 10 | -- Portability : portable 11 | -- 12 | -- The Cauchy-Lorentz distribution. It's also known as Lorentz 13 | -- distribution or Breit–Wigner distribution. 14 | -- 15 | -- It doesn't have mean and variance. 16 | module Statistics.Distribution.CauchyLorentz ( 17 | CauchyDistribution 18 | , cauchyDistribMedian 19 | , cauchyDistribScale 20 | -- * Constructors 21 | , cauchyDistribution 22 | , cauchyDistributionE 23 | , standardCauchy 24 | ) where 25 | 26 | import Control.Applicative 27 | import Data.Aeson (FromJSON(..), ToJSON, Value(..), (.:)) 28 | import Data.Binary (Binary(..)) 29 | import Data.Maybe (fromMaybe) 30 | import Data.Data (Data, Typeable) 31 | import GHC.Generics (Generic) 32 | import qualified Statistics.Distribution as D 33 | import Statistics.Internal 34 | 35 | -- | Cauchy-Lorentz distribution. 36 | data CauchyDistribution = CD { 37 | -- | Central value of Cauchy-Lorentz distribution which is its 38 | -- mode and median. Distribution doesn't have mean so function 39 | -- is named after median. 40 | cauchyDistribMedian :: {-# UNPACK #-} !Double 41 | -- | Scale parameter of Cauchy-Lorentz distribution. It's 42 | -- different from variance and specify half width at half 43 | -- maximum (HWHM). 44 | , cauchyDistribScale :: {-# UNPACK #-} !Double 45 | } 46 | deriving (Eq, Typeable, Data, Generic) 47 | 48 | instance Show CauchyDistribution where 49 | showsPrec i (CD m s) = defaultShow2 "cauchyDistribution" m s i 50 | instance Read CauchyDistribution where 51 | readPrec = defaultReadPrecM2 "cauchyDistribution" cauchyDistributionE 52 | 53 | instance ToJSON CauchyDistribution 54 | instance FromJSON CauchyDistribution where 55 | parseJSON (Object v) = do 56 | m <- v .: "cauchyDistribMedian" 57 | s <- v .: "cauchyDistribScale" 58 | maybe (fail $ errMsg m s) return $ cauchyDistributionE m s 59 | parseJSON _ = empty 60 | 61 | instance Binary CauchyDistribution where 62 | put (CD m s) = put m >> put s 63 | get = do 64 | m <- get 65 | s <- get 66 | maybe (error $ errMsg m s) return $ cauchyDistributionE m s 67 | 68 | 69 | -- | Cauchy distribution 70 | cauchyDistribution :: Double -- ^ Central point 71 | -> Double -- ^ Scale parameter (FWHM) 72 | -> CauchyDistribution 73 | cauchyDistribution m s 74 | = fromMaybe (error $ errMsg m s) 75 | $ cauchyDistributionE m s 76 | 77 | 78 | -- | Cauchy distribution 79 | cauchyDistributionE :: Double -- ^ Central point 80 | -> Double -- ^ Scale parameter (FWHM) 81 | -> Maybe CauchyDistribution 82 | cauchyDistributionE m s 83 | | s > 0 = Just (CD m s) 84 | | otherwise = Nothing 85 | 86 | errMsg :: Double -> Double -> String 87 | errMsg _ s 88 | = "Statistics.Distribution.CauchyLorentz.cauchyDistribution: FWHM must be positive. Got " 89 | ++ show s 90 | 91 | -- | Standard Cauchy distribution. It's centered at 0 and have 1 FWHM 92 | standardCauchy :: CauchyDistribution 93 | standardCauchy = CD 0 1 94 | 95 | 96 | instance D.Distribution CauchyDistribution where 97 | cumulative (CD m s) x 98 | | y < -1 = atan (-1/y) / pi 99 | | otherwise = 0.5 + atan y / pi 100 | where 101 | y = (x - m) / s 102 | complCumulative (CD m s) x 103 | | y > 1 = atan (1/y) / pi 104 | | otherwise = 0.5 - atan y / pi 105 | where 106 | y = (x - m) / s 107 | 108 | instance D.ContDistr CauchyDistribution where 109 | density (CD m s) x = (1 / pi) / (s * (1 + y*y)) 110 | where y = (x - m) / s 111 | quantile (CD m s) p 112 | | p == 0 = -1 / 0 113 | | p == 1 = 1 / 0 114 | | p == 0.5 = m 115 | | p < 0 = err 116 | | p < 0.5 = m - s / tan( pi * p ) 117 | | p < 1 = m + s / tan( pi * (1 - p) ) 118 | | otherwise = err 119 | where 120 | err = error 121 | $ "Statistics.Distribution.CauchyLorentz.quantile: p must be in [0,1] range. Got: "++show p 122 | complQuantile (CD m s) p 123 | | p == 0 = 1 / 0 124 | | p == 1 = -1 / 0 125 | | p == 0.5 = m 126 | | p < 0 = err 127 | | p < 0.5 = m + s / tan( pi * p ) 128 | | p < 1 = m - s / tan( pi * (1 - p) ) 129 | | otherwise = err 130 | where 131 | err = error 132 | $ "Statistics.Distribution.CauchyLorentz.quantile: p must be in [0,1] range. Got: "++show p 133 | 134 | 135 | instance D.ContGen CauchyDistribution where 136 | genContVar = D.genContinuous 137 | 138 | instance D.Entropy CauchyDistribution where 139 | entropy (CD _ s) = log s + log (4*pi) 140 | 141 | instance D.MaybeEntropy CauchyDistribution where 142 | maybeEntropy = Just . D.entropy 143 | -------------------------------------------------------------------------------- /Statistics/Distribution/ChiSquared.hs: -------------------------------------------------------------------------------- 1 | {-# LANGUAGE OverloadedStrings #-} 2 | {-# LANGUAGE DeriveDataTypeable, DeriveGeneric #-} 3 | -- | 4 | -- Module : Statistics.Distribution.ChiSquared 5 | -- Copyright : (c) 2010 Alexey Khudyakov 6 | -- License : BSD3 7 | -- 8 | -- Maintainer : bos@serpentine.com 9 | -- Stability : experimental 10 | -- Portability : portable 11 | -- 12 | -- The chi-squared distribution. This is a continuous probability 13 | -- distribution of sum of squares of k independent standard normal 14 | -- distributions. It's commonly used in statistical tests 15 | module Statistics.Distribution.ChiSquared ( 16 | ChiSquared 17 | , chiSquaredNDF 18 | -- * Constructors 19 | , chiSquared 20 | , chiSquaredE 21 | ) where 22 | 23 | import Control.Applicative 24 | import Data.Aeson (FromJSON(..), ToJSON, Value(..), (.:)) 25 | import Data.Binary (Binary(..)) 26 | import Data.Data (Data, Typeable) 27 | import GHC.Generics (Generic) 28 | import Numeric.SpecFunctions ( incompleteGamma,invIncompleteGamma,logGamma,digamma) 29 | import Numeric.MathFunctions.Constants (m_neg_inf) 30 | import qualified System.Random.MWC.Distributions as MWC 31 | 32 | import qualified Statistics.Distribution as D 33 | import Statistics.Internal 34 | 35 | 36 | 37 | -- | Chi-squared distribution 38 | newtype ChiSquared = ChiSquared 39 | { chiSquaredNDF :: Int 40 | -- ^ Get number of degrees of freedom 41 | } 42 | deriving (Eq, Typeable, Data, Generic) 43 | 44 | instance Show ChiSquared where 45 | showsPrec i (ChiSquared n) = defaultShow1 "chiSquared" n i 46 | instance Read ChiSquared where 47 | readPrec = defaultReadPrecM1 "chiSquared" chiSquaredE 48 | 49 | instance ToJSON ChiSquared 50 | instance FromJSON ChiSquared where 51 | parseJSON (Object v) = do 52 | n <- v .: "chiSquaredNDF" 53 | maybe (fail $ errMsg n) return $ chiSquaredE n 54 | parseJSON _ = empty 55 | 56 | instance Binary ChiSquared where 57 | put (ChiSquared x) = put x 58 | get = do n <- get 59 | maybe (fail $ errMsg n) return $ chiSquaredE n 60 | 61 | 62 | -- | Construct chi-squared distribution. Number of degrees of freedom 63 | -- must be positive. 64 | chiSquared :: Int -> ChiSquared 65 | chiSquared n = maybe (error $ errMsg n) id $ chiSquaredE n 66 | 67 | -- | Construct chi-squared distribution. Number of degrees of freedom 68 | -- must be positive. 69 | chiSquaredE :: Int -> Maybe ChiSquared 70 | chiSquaredE n 71 | | n <= 0 = Nothing 72 | | otherwise = Just (ChiSquared n) 73 | 74 | errMsg :: Int -> String 75 | errMsg n = "Statistics.Distribution.ChiSquared.chiSquared: N.D.F. must be positive. Got " ++ show n 76 | 77 | instance D.Distribution ChiSquared where 78 | cumulative = cumulative 79 | 80 | instance D.ContDistr ChiSquared where 81 | density chi x 82 | | x <= 0 = 0 83 | | otherwise = exp $ log x * (ndf2 - 1) - x2 - logGamma ndf2 - log 2 * ndf2 84 | where 85 | ndf = fromIntegral $ chiSquaredNDF chi 86 | ndf2 = ndf/2 87 | x2 = x/2 88 | 89 | logDensity chi x 90 | | x <= 0 = m_neg_inf 91 | | otherwise = log x * (ndf2 - 1) - x2 - logGamma ndf2 - log 2 * ndf2 92 | where 93 | ndf = fromIntegral $ chiSquaredNDF chi 94 | ndf2 = ndf/2 95 | x2 = x/2 96 | 97 | quantile = quantile 98 | 99 | instance D.Mean ChiSquared where 100 | mean (ChiSquared ndf) = fromIntegral ndf 101 | 102 | instance D.Variance ChiSquared where 103 | variance (ChiSquared ndf) = fromIntegral (2*ndf) 104 | 105 | instance D.MaybeMean ChiSquared where 106 | maybeMean = Just . D.mean 107 | 108 | instance D.MaybeVariance ChiSquared where 109 | maybeStdDev = Just . D.stdDev 110 | maybeVariance = Just . D.variance 111 | 112 | instance D.Entropy ChiSquared where 113 | entropy (ChiSquared ndf) = 114 | let kHalf = 0.5 * fromIntegral ndf in 115 | kHalf 116 | + log 2 117 | + logGamma kHalf 118 | + (1-kHalf) * digamma kHalf 119 | 120 | instance D.MaybeEntropy ChiSquared where 121 | maybeEntropy = Just . D.entropy 122 | 123 | instance D.ContGen ChiSquared where 124 | genContVar (ChiSquared n) = MWC.chiSquare n 125 | 126 | 127 | cumulative :: ChiSquared -> Double -> Double 128 | cumulative chi x 129 | | x <= 0 = 0 130 | | otherwise = incompleteGamma (ndf/2) (x/2) 131 | where 132 | ndf = fromIntegral $ chiSquaredNDF chi 133 | 134 | quantile :: ChiSquared -> Double -> Double 135 | quantile (ChiSquared ndf) p 136 | | p == 0 = 0 137 | | p == 1 = 1/0 138 | | p > 0 && p < 1 = 2 * invIncompleteGamma (fromIntegral ndf / 2) p 139 | | otherwise = 140 | error $ "Statistics.Distribution.ChiSquared.quantile: p must be in [0,1] range. Got: "++show p 141 | -------------------------------------------------------------------------------- /Statistics/Distribution/DiscreteUniform.hs: -------------------------------------------------------------------------------- 1 | {-# LANGUAGE DeriveDataTypeable, DeriveGeneric, OverloadedStrings #-} 2 | -- | 3 | -- Module : Statistics.Distribution.DiscreteUniform 4 | -- Copyright : (c) 2016 André Szabolcs Szelp 5 | -- License : BSD3 6 | -- 7 | -- Maintainer : a.sz.szelp@gmail.com 8 | -- Stability : experimental 9 | -- Portability : portable 10 | -- 11 | -- The discrete uniform distribution. There are two parametrizations of 12 | -- this distribution. First is the probability distribution on an 13 | -- inclusive interval {1, ..., n}. This is parametrized with n only, 14 | -- where p_1, ..., p_n = 1/n. ('discreteUniform'). 15 | -- 16 | -- The second parametrization is the uniform distribution on {a, ..., b} with 17 | -- probabilities p_a, ..., p_b = 1/(a-b+1). This is parametrized with 18 | -- /a/ and /b/. ('discreteUniformAB') 19 | 20 | module Statistics.Distribution.DiscreteUniform 21 | ( 22 | DiscreteUniform 23 | -- * Constructors 24 | , discreteUniform 25 | , discreteUniformAB 26 | -- * Accessors 27 | , rangeFrom 28 | , rangeTo 29 | ) where 30 | 31 | import Control.Applicative (empty) 32 | import Data.Aeson (FromJSON(..), ToJSON, Value(..), (.:)) 33 | import Data.Binary (Binary(..)) 34 | import Data.Data (Data, Typeable) 35 | import System.Random.Stateful (uniformRM) 36 | import GHC.Generics (Generic) 37 | 38 | import qualified Statistics.Distribution as D 39 | import Statistics.Internal 40 | 41 | 42 | 43 | -- | The discrete uniform distribution. 44 | data DiscreteUniform = U { 45 | rangeFrom :: {-# UNPACK #-} !Int 46 | -- ^ /a/, the lower bound of the support {a, ..., b} 47 | , rangeTo :: {-# UNPACK #-} !Int 48 | -- ^ /b/, the upper bound of the support {a, ..., b} 49 | } deriving (Eq, Typeable, Data, Generic) 50 | 51 | instance Show DiscreteUniform where 52 | showsPrec i (U a b) = defaultShow2 "discreteUniformAB" a b i 53 | instance Read DiscreteUniform where 54 | readPrec = defaultReadPrecM2 "discreteUniformAB" (\a b -> Just (discreteUniformAB a b)) 55 | 56 | instance ToJSON DiscreteUniform 57 | instance FromJSON DiscreteUniform where 58 | parseJSON (Object v) = do 59 | a <- v .: "uniformA" 60 | b <- v .: "uniformB" 61 | return $ discreteUniformAB a b 62 | parseJSON _ = empty 63 | 64 | instance Binary DiscreteUniform where 65 | put (U a b) = put a >> put b 66 | get = discreteUniformAB <$> get <*> get 67 | 68 | instance D.Distribution DiscreteUniform where 69 | cumulative (U a b) x 70 | | x < fromIntegral a = 0 71 | | x > fromIntegral b = 1 72 | | otherwise = fromIntegral (floor x - a + 1) / fromIntegral (b - a + 1) 73 | 74 | instance D.DiscreteDistr DiscreteUniform where 75 | probability (U a b) k 76 | | k >= a && k <= b = 1 / fromIntegral (b - a + 1) 77 | | otherwise = 0 78 | 79 | instance D.Mean DiscreteUniform where 80 | mean (U a b) = fromIntegral (a+b)/2 81 | 82 | instance D.Variance DiscreteUniform where 83 | variance (U a b) = (fromIntegral (b - a + 1)^(2::Int) - 1) / 12 84 | 85 | instance D.MaybeMean DiscreteUniform where 86 | maybeMean = Just . D.mean 87 | 88 | instance D.MaybeVariance DiscreteUniform where 89 | maybeStdDev = Just . D.stdDev 90 | maybeVariance = Just . D.variance 91 | 92 | instance D.Entropy DiscreteUniform where 93 | entropy (U a b) = log $ fromIntegral $ b - a + 1 94 | 95 | instance D.MaybeEntropy DiscreteUniform where 96 | maybeEntropy = Just . D.entropy 97 | 98 | instance D.ContGen DiscreteUniform where 99 | genContVar d = fmap fromIntegral . D.genDiscreteVar d 100 | 101 | instance D.DiscreteGen DiscreteUniform where 102 | genDiscreteVar (U a b) = uniformRM (a,b) 103 | 104 | -- | Construct discrete uniform distribution on support {1, ..., n}. 105 | -- Range /n/ must be >0. 106 | discreteUniform :: Int -- ^ Range 107 | -> DiscreteUniform 108 | discreteUniform n 109 | | n < 1 = error $ msg ++ "range must be > 0. Got " ++ show n 110 | | otherwise = U 1 n 111 | where msg = "Statistics.Distribution.DiscreteUniform.discreteUniform: " 112 | 113 | -- | Construct discrete uniform distribution on support {a, ..., b}. 114 | discreteUniformAB :: Int -- ^ Lower boundary (inclusive) 115 | -> Int -- ^ Upper boundary (inclusive) 116 | -> DiscreteUniform 117 | discreteUniformAB a b 118 | | b < a = U b a 119 | | otherwise = U a b 120 | -------------------------------------------------------------------------------- /Statistics/Distribution/Exponential.hs: -------------------------------------------------------------------------------- 1 | {-# LANGUAGE MultiParamTypeClasses #-} 2 | {-# LANGUAGE OverloadedStrings #-} 3 | {-# LANGUAGE DeriveDataTypeable, DeriveGeneric #-} 4 | -- | 5 | -- Module : Statistics.Distribution.Exponential 6 | -- Copyright : (c) 2009 Bryan O'Sullivan 7 | -- License : BSD3 8 | -- 9 | -- Maintainer : bos@serpentine.com 10 | -- Stability : experimental 11 | -- Portability : portable 12 | -- 13 | -- The exponential distribution. This is the continuous probability 14 | -- distribution of the times between events in a Poisson process, in 15 | -- which events occur continuously and independently at a constant 16 | -- average rate. 17 | 18 | module Statistics.Distribution.Exponential 19 | ( 20 | ExponentialDistribution 21 | -- * Constructors 22 | , exponential 23 | , exponentialE 24 | -- * Accessors 25 | , edLambda 26 | ) where 27 | 28 | import Control.Applicative 29 | import Data.Aeson (FromJSON(..),ToJSON,Value(..),(.:)) 30 | import Data.Binary (Binary, put, get) 31 | import Data.Data (Data, Typeable) 32 | import GHC.Generics (Generic) 33 | import Numeric.SpecFunctions (log1p,expm1) 34 | import Numeric.MathFunctions.Constants (m_neg_inf) 35 | import qualified System.Random.MWC.Distributions as MWC 36 | 37 | import qualified Statistics.Distribution as D 38 | import qualified Statistics.Sample as S 39 | import Statistics.Internal 40 | 41 | 42 | 43 | newtype ExponentialDistribution = ED { 44 | edLambda :: Double 45 | } deriving (Eq, Typeable, Data, Generic) 46 | 47 | instance Show ExponentialDistribution where 48 | showsPrec n (ED l) = defaultShow1 "exponential" l n 49 | instance Read ExponentialDistribution where 50 | readPrec = defaultReadPrecM1 "exponential" exponentialE 51 | 52 | instance ToJSON ExponentialDistribution 53 | instance FromJSON ExponentialDistribution where 54 | parseJSON (Object v) = do 55 | l <- v .: "edLambda" 56 | maybe (fail $ errMsg l) return $ exponentialE l 57 | parseJSON _ = empty 58 | 59 | instance Binary ExponentialDistribution where 60 | put = put . edLambda 61 | get = do 62 | l <- get 63 | maybe (fail $ errMsg l) return $ exponentialE l 64 | 65 | instance D.Distribution ExponentialDistribution where 66 | cumulative = cumulative 67 | complCumulative = complCumulative 68 | 69 | instance D.ContDistr ExponentialDistribution where 70 | density (ED l) x 71 | | x < 0 = 0 72 | | otherwise = l * exp (-l * x) 73 | logDensity (ED l) x 74 | | x < 0 = m_neg_inf 75 | | otherwise = log l + (-l * x) 76 | quantile = quantile 77 | complQuantile = complQuantile 78 | 79 | instance D.Mean ExponentialDistribution where 80 | mean (ED l) = 1 / l 81 | 82 | instance D.Variance ExponentialDistribution where 83 | variance (ED l) = 1 / (l * l) 84 | 85 | instance D.MaybeMean ExponentialDistribution where 86 | maybeMean = Just . D.mean 87 | 88 | instance D.MaybeVariance ExponentialDistribution where 89 | maybeStdDev = Just . D.stdDev 90 | maybeVariance = Just . D.variance 91 | 92 | instance D.Entropy ExponentialDistribution where 93 | entropy (ED l) = 1 - log l 94 | 95 | instance D.MaybeEntropy ExponentialDistribution where 96 | maybeEntropy = Just . D.entropy 97 | 98 | instance D.ContGen ExponentialDistribution where 99 | genContVar = MWC.exponential . edLambda 100 | 101 | cumulative :: ExponentialDistribution -> Double -> Double 102 | cumulative (ED l) x | x <= 0 = 0 103 | | otherwise = - expm1 (-l * x) 104 | 105 | complCumulative :: ExponentialDistribution -> Double -> Double 106 | complCumulative (ED l) x | x <= 0 = 1 107 | | otherwise = exp (-l * x) 108 | 109 | 110 | quantile :: ExponentialDistribution -> Double -> Double 111 | quantile (ED l) p 112 | | p >= 0 && p <= 1 = - log1p(-p) / l 113 | | otherwise = 114 | error $ "Statistics.Distribution.Exponential.quantile: p must be in [0,1] range. Got: "++show p 115 | 116 | complQuantile :: ExponentialDistribution -> Double -> Double 117 | complQuantile (ED l) p 118 | | p == 0 = 0 119 | | p >= 0 && p < 1 = -log p / l 120 | | otherwise = 121 | error $ "Statistics.Distribution.Exponential.quantile: p must be in [0,1] range. Got: "++show p 122 | 123 | -- | Create an exponential distribution. 124 | exponential :: Double -- ^ Rate parameter. 125 | -> ExponentialDistribution 126 | exponential l = maybe (error $ errMsg l) id $ exponentialE l 127 | 128 | -- | Create an exponential distribution. 129 | exponentialE :: Double -- ^ Rate parameter. 130 | -> Maybe ExponentialDistribution 131 | exponentialE l 132 | | l > 0 = Just (ED l) 133 | | otherwise = Nothing 134 | 135 | errMsg :: Double -> String 136 | errMsg l = "Statistics.Distribution.Exponential.exponential: scale parameter must be positive. Got " ++ show l 137 | 138 | -- | Create exponential distribution from sample. Estimates the rate 139 | -- with the maximum likelihood estimator, which is biased. Returns 140 | -- @Nothing@ if the sample mean does not exist or is not positive. 141 | instance D.FromSample ExponentialDistribution Double where 142 | fromSample xs = let m = S.mean xs 143 | in if m > 0 then Just (ED (1/m)) else Nothing 144 | -------------------------------------------------------------------------------- /Statistics/Distribution/FDistribution.hs: -------------------------------------------------------------------------------- 1 | {-# LANGUAGE OverloadedStrings #-} 2 | {-# LANGUAGE DeriveDataTypeable, DeriveGeneric #-} 3 | -- | 4 | -- Module : Statistics.Distribution.FDistribution 5 | -- Copyright : (c) 2011 Aleksey Khudyakov 6 | -- License : BSD3 7 | -- 8 | -- Maintainer : bos@serpentine.com 9 | -- Stability : experimental 10 | -- Portability : portable 11 | -- 12 | -- Fisher F distribution 13 | module Statistics.Distribution.FDistribution ( 14 | FDistribution 15 | -- * Constructors 16 | , fDistribution 17 | , fDistributionE 18 | , fDistributionReal 19 | , fDistributionRealE 20 | -- * Accessors 21 | , fDistributionNDF1 22 | , fDistributionNDF2 23 | ) where 24 | 25 | import Control.Applicative 26 | import Data.Aeson (FromJSON(..), ToJSON, Value(..), (.:)) 27 | import Data.Binary (Binary(..)) 28 | import Data.Data (Data, Typeable) 29 | import GHC.Generics (Generic) 30 | import Numeric.SpecFunctions ( 31 | logBeta, incompleteBeta, invIncompleteBeta, digamma) 32 | import Numeric.MathFunctions.Constants (m_neg_inf) 33 | 34 | import qualified Statistics.Distribution as D 35 | import Statistics.Function (square) 36 | import Statistics.Internal 37 | 38 | 39 | -- | F distribution 40 | data FDistribution = F { fDistributionNDF1 :: {-# UNPACK #-} !Double 41 | , fDistributionNDF2 :: {-# UNPACK #-} !Double 42 | , _pdfFactor :: {-# UNPACK #-} !Double 43 | } 44 | deriving (Eq, Typeable, Data, Generic) 45 | 46 | instance Show FDistribution where 47 | showsPrec i (F n m _) = defaultShow2 "fDistributionReal" n m i 48 | instance Read FDistribution where 49 | readPrec = defaultReadPrecM2 "fDistributionReal" fDistributionRealE 50 | 51 | instance ToJSON FDistribution 52 | instance FromJSON FDistribution where 53 | parseJSON (Object v) = do 54 | n <- v .: "fDistributionNDF1" 55 | m <- v .: "fDistributionNDF2" 56 | maybe (fail $ errMsgR n m) return $ fDistributionRealE n m 57 | parseJSON _ = empty 58 | 59 | instance Binary FDistribution where 60 | put (F n m _) = put n >> put m 61 | get = do 62 | n <- get 63 | m <- get 64 | maybe (fail $ errMsgR n m) return $ fDistributionRealE n m 65 | 66 | fDistribution :: Int -> Int -> FDistribution 67 | fDistribution n m = maybe (error $ errMsg n m) id $ fDistributionE n m 68 | 69 | fDistributionReal :: Double -> Double -> FDistribution 70 | fDistributionReal n m = maybe (error $ errMsgR n m) id $ fDistributionRealE n m 71 | 72 | fDistributionE :: Int -> Int -> Maybe FDistribution 73 | fDistributionE n m 74 | | n > 0 && m > 0 = 75 | let n' = fromIntegral n 76 | m' = fromIntegral m 77 | f' = 0.5 * (log m' * m' + log n' * n') - logBeta (0.5*n') (0.5*m') 78 | in Just $ F n' m' f' 79 | | otherwise = Nothing 80 | 81 | fDistributionRealE :: Double -> Double -> Maybe FDistribution 82 | fDistributionRealE n m 83 | | n > 0 && m > 0 = 84 | let f' = 0.5 * (log m * m + log n * n) - logBeta (0.5*n) (0.5*m) 85 | in Just $ F n m f' 86 | | otherwise = Nothing 87 | 88 | errMsg :: Int -> Int -> String 89 | errMsg _ _ = "Statistics.Distribution.FDistribution.fDistribution: non-positive number of degrees of freedom" 90 | 91 | errMsgR :: Double -> Double -> String 92 | errMsgR _ _ = "Statistics.Distribution.FDistribution.fDistribution: non-positive number of degrees of freedom" 93 | 94 | 95 | 96 | instance D.Distribution FDistribution where 97 | cumulative = cumulative 98 | complCumulative = complCumulative 99 | 100 | instance D.ContDistr FDistribution where 101 | density d x 102 | | x <= 0 = 0 103 | | otherwise = exp $ logDensity d x 104 | logDensity d x 105 | | x <= 0 = m_neg_inf 106 | | otherwise = logDensity d x 107 | quantile = quantile 108 | 109 | cumulative :: FDistribution -> Double -> Double 110 | cumulative (F n m _) x 111 | | x <= 0 = 0 112 | -- Only matches +∞ 113 | | isInfinite x = 1 114 | -- NOTE: Here we rely on implementation detail of incompleteBeta. It 115 | -- computes using series expansion for sufficiently small x 116 | -- and uses following identity otherwise: 117 | -- 118 | -- I(x; a, b) = 1 - I(1-x; b, a) 119 | -- 120 | -- Point is we can compute 1-x as m/(m+y) without loss of 121 | -- precision for large x. Sadly this switchover point is 122 | -- implementation detail. 123 | | n >= (n+m)*bx = incompleteBeta (0.5 * n) (0.5 * m) bx 124 | | otherwise = 1 - incompleteBeta (0.5 * m) (0.5 * n) bx1 125 | where 126 | y = n * x 127 | bx = y / (m + y) 128 | bx1 = m / (m + y) 129 | 130 | complCumulative :: FDistribution -> Double -> Double 131 | complCumulative (F n m _) x 132 | | x <= 0 = 1 133 | -- Only matches +∞ 134 | | isInfinite x = 0 135 | -- See NOTE at cumulative 136 | | m >= (n+m)*bx = incompleteBeta (0.5 * m) (0.5 * n) bx 137 | | otherwise = 1 - incompleteBeta (0.5 * n) (0.5 * m) bx1 138 | where 139 | y = n*x 140 | bx = m / (m + y) 141 | bx1 = y / (m + y) 142 | 143 | logDensity :: FDistribution -> Double -> Double 144 | logDensity (F n m fac) x 145 | = fac + log x * (0.5 * n - 1) - log(m + n*x) * 0.5 * (n + m) 146 | 147 | quantile :: FDistribution -> Double -> Double 148 | quantile (F n m _) p 149 | | p >= 0 && p <= 1 = 150 | let x = invIncompleteBeta (0.5 * n) (0.5 * m) p 151 | in m * x / (n * (1 - x)) 152 | | otherwise = 153 | error $ "Statistics.Distribution.Uniform.quantile: p must be in [0,1] range. Got: "++show p 154 | 155 | 156 | instance D.MaybeMean FDistribution where 157 | maybeMean (F _ m _) | m > 2 = Just $ m / (m - 2) 158 | | otherwise = Nothing 159 | 160 | instance D.MaybeVariance FDistribution where 161 | maybeStdDev (F n m _) 162 | | m > 4 = Just $ 2 * square m * (m + n - 2) / (n * square (m - 2) * (m - 4)) 163 | | otherwise = Nothing 164 | 165 | instance D.Entropy FDistribution where 166 | entropy (F n m _) = 167 | let nHalf = 0.5 * n 168 | mHalf = 0.5 * m in 169 | log (n/m) 170 | + logBeta nHalf mHalf 171 | + (1 - nHalf) * digamma nHalf 172 | - (1 + mHalf) * digamma mHalf 173 | + (nHalf + mHalf) * digamma (nHalf + mHalf) 174 | 175 | instance D.MaybeEntropy FDistribution where 176 | maybeEntropy = Just . D.entropy 177 | 178 | instance D.ContGen FDistribution where 179 | genContVar = D.genContinuous 180 | -------------------------------------------------------------------------------- /Statistics/Distribution/Hypergeometric.hs: -------------------------------------------------------------------------------- 1 | {-# LANGUAGE OverloadedStrings #-} 2 | {-# LANGUAGE DeriveDataTypeable, DeriveGeneric #-} 3 | -- | 4 | -- Module : Statistics.Distribution.Hypergeometric 5 | -- Copyright : (c) 2009 Bryan O'Sullivan 6 | -- License : BSD3 7 | -- 8 | -- Maintainer : bos@serpentine.com 9 | -- Stability : experimental 10 | -- Portability : portable 11 | -- 12 | -- The Hypergeometric distribution. This is the discrete probability 13 | -- distribution that measures the probability of /k/ successes in /l/ 14 | -- trials, without replacement, from a finite population. 15 | -- 16 | -- The parameters of the distribution describe /k/ elements chosen 17 | -- from a population of /l/, with /m/ elements of one type, and 18 | -- /l/-/m/ of the other (all are positive integers). 19 | 20 | module Statistics.Distribution.Hypergeometric 21 | ( 22 | HypergeometricDistribution 23 | -- * Constructors 24 | , hypergeometric 25 | , hypergeometricE 26 | -- ** Accessors 27 | , hdM 28 | , hdL 29 | , hdK 30 | ) where 31 | 32 | import Control.Applicative 33 | import Data.Aeson (FromJSON(..), ToJSON, Value(..), (.:)) 34 | import Data.Binary (Binary(..)) 35 | import Data.Data (Data, Typeable) 36 | import GHC.Generics (Generic) 37 | import Numeric.MathFunctions.Constants (m_epsilon,m_neg_inf) 38 | import Numeric.SpecFunctions (choose,logChoose) 39 | 40 | import qualified Statistics.Distribution as D 41 | import Statistics.Internal 42 | 43 | 44 | data HypergeometricDistribution = HD { 45 | hdM :: {-# UNPACK #-} !Int 46 | , hdL :: {-# UNPACK #-} !Int 47 | , hdK :: {-# UNPACK #-} !Int 48 | } deriving (Eq, Typeable, Data, Generic) 49 | 50 | instance Show HypergeometricDistribution where 51 | showsPrec i (HD m l k) = defaultShow3 "hypergeometric" m l k i 52 | instance Read HypergeometricDistribution where 53 | readPrec = defaultReadPrecM3 "hypergeometric" hypergeometricE 54 | 55 | instance ToJSON HypergeometricDistribution 56 | instance FromJSON HypergeometricDistribution where 57 | parseJSON (Object v) = do 58 | m <- v .: "hdM" 59 | l <- v .: "hdL" 60 | k <- v .: "hdK" 61 | maybe (fail $ errMsg m l k) return $ hypergeometricE m l k 62 | parseJSON _ = empty 63 | 64 | instance Binary HypergeometricDistribution where 65 | put (HD m l k) = put m >> put l >> put k 66 | get = do 67 | m <- get 68 | l <- get 69 | k <- get 70 | maybe (fail $ errMsg m l k) return $ hypergeometricE m l k 71 | 72 | instance D.Distribution HypergeometricDistribution where 73 | cumulative = cumulative 74 | complCumulative = complCumulative 75 | 76 | instance D.DiscreteDistr HypergeometricDistribution where 77 | probability = probability 78 | logProbability = logProbability 79 | 80 | instance D.Mean HypergeometricDistribution where 81 | mean = mean 82 | 83 | instance D.Variance HypergeometricDistribution where 84 | variance = variance 85 | 86 | instance D.MaybeMean HypergeometricDistribution where 87 | maybeMean = Just . D.mean 88 | 89 | instance D.MaybeVariance HypergeometricDistribution where 90 | maybeStdDev = Just . D.stdDev 91 | maybeVariance = Just . D.variance 92 | 93 | instance D.Entropy HypergeometricDistribution where 94 | entropy = directEntropy 95 | 96 | instance D.MaybeEntropy HypergeometricDistribution where 97 | maybeEntropy = Just . D.entropy 98 | 99 | variance :: HypergeometricDistribution -> Double 100 | variance (HD m l k) = (k' * ml) * (1 - ml) * (l' - k') / (l' - 1) 101 | where m' = fromIntegral m 102 | l' = fromIntegral l 103 | k' = fromIntegral k 104 | ml = m' / l' 105 | 106 | mean :: HypergeometricDistribution -> Double 107 | mean (HD m l k) = fromIntegral k * fromIntegral m / fromIntegral l 108 | 109 | directEntropy :: HypergeometricDistribution -> Double 110 | directEntropy d@(HD m _ _) 111 | = negate . sum 112 | $ takeWhile (< negate m_epsilon) 113 | $ dropWhile (not . (< negate m_epsilon)) 114 | [ let x = probability d n in x * log x | n <- [0..m]] 115 | 116 | 117 | hypergeometric :: Int -- ^ /m/ 118 | -> Int -- ^ /l/ 119 | -> Int -- ^ /k/ 120 | -> HypergeometricDistribution 121 | hypergeometric m l k 122 | = maybe (error $ errMsg m l k) id $ hypergeometricE m l k 123 | 124 | hypergeometricE :: Int -- ^ /m/ 125 | -> Int -- ^ /l/ 126 | -> Int -- ^ /k/ 127 | -> Maybe HypergeometricDistribution 128 | hypergeometricE m l k 129 | | not (l > 0) = Nothing 130 | | not (m >= 0 && m <= l) = Nothing 131 | | not (k > 0 && k <= l) = Nothing 132 | | otherwise = Just (HD m l k) 133 | 134 | 135 | errMsg :: Int -> Int -> Int -> String 136 | errMsg m l k 137 | = "Statistics.Distribution.Hypergeometric.hypergeometric:" 138 | ++ " m=" ++ show m 139 | ++ " l=" ++ show l 140 | ++ " k=" ++ show k 141 | ++ " should hold: l>0 & m in [0,l] & k in (0,l]" 142 | 143 | -- Naive implementation 144 | probability :: HypergeometricDistribution -> Int -> Double 145 | probability (HD mi li ki) n 146 | | n < max 0 (mi+ki-li) || n > min mi ki = 0 147 | -- No overflow 148 | | li < 1000 = choose mi n * choose (li - mi) (ki - n) 149 | / choose li ki 150 | | otherwise = exp $ logChoose mi n 151 | + logChoose (li - mi) (ki - n) 152 | - logChoose li ki 153 | 154 | logProbability :: HypergeometricDistribution -> Int -> Double 155 | logProbability (HD mi li ki) n 156 | | n < max 0 (mi+ki-li) || n > min mi ki = m_neg_inf 157 | | otherwise = logChoose mi n 158 | + logChoose (li - mi) (ki - n) 159 | - logChoose li ki 160 | 161 | cumulative :: HypergeometricDistribution -> Double -> Double 162 | cumulative d@(HD mi li ki) x 163 | | isNaN x = error "Statistics.Distribution.Hypergeometric.cumulative: NaN argument" 164 | | isInfinite x = if x > 0 then 1 else 0 165 | | n < minN = 0 166 | | n >= maxN = 1 167 | | otherwise = D.sumProbabilities d minN n 168 | where 169 | n = floor x 170 | minN = max 0 (mi+ki-li) 171 | maxN = min mi ki 172 | 173 | complCumulative :: HypergeometricDistribution -> Double -> Double 174 | complCumulative d@(HD mi li ki) x 175 | | isNaN x = error "Statistics.Distribution.Hypergeometric.complCumulative: NaN argument" 176 | | isInfinite x = if x > 0 then 0 else 1 177 | | n < minN = 1 178 | | n >= maxN = 0 179 | | otherwise = D.sumProbabilities d (n + 1) maxN 180 | where 181 | n = floor x 182 | minN = max 0 (mi+ki-li) 183 | maxN = min mi ki 184 | -------------------------------------------------------------------------------- /Statistics/Distribution/Laplace.hs: -------------------------------------------------------------------------------- 1 | {-# LANGUAGE MultiParamTypeClasses #-} 2 | {-# LANGUAGE OverloadedStrings #-} 3 | {-# LANGUAGE DeriveDataTypeable, DeriveGeneric #-} 4 | -- | 5 | -- Module : Statistics.Distribution.Laplace 6 | -- Copyright : (c) 2015 Mihai Maruseac 7 | -- License : BSD3 8 | -- 9 | -- Maintainer : mihai.maruseac@maruseac.com 10 | -- Stability : experimental 11 | -- Portability : portable 12 | -- 13 | -- The Laplace distribution. This is the continuous probability 14 | -- defined as the difference of two iid exponential random variables 15 | -- or a Brownian motion evaluated as exponentially distributed times. 16 | -- It is used in differential privacy (Laplace Method), speech 17 | -- recognition and least absolute deviations method (Laplace's first 18 | -- law of errors, giving a robust regression method) 19 | -- 20 | module Statistics.Distribution.Laplace 21 | ( 22 | LaplaceDistribution 23 | -- * Constructors 24 | , laplace 25 | , laplaceE 26 | -- * Accessors 27 | , ldLocation 28 | , ldScale 29 | ) where 30 | 31 | import Control.Applicative 32 | import Data.Aeson (FromJSON(..), ToJSON, Value(..), (.:)) 33 | import Data.Binary (Binary(..)) 34 | import Data.Data (Data, Typeable) 35 | import GHC.Generics (Generic) 36 | import qualified Data.Vector.Generic as G 37 | import qualified Statistics.Distribution as D 38 | import qualified Statistics.Quantile as Q 39 | import qualified Statistics.Sample as S 40 | import Statistics.Internal 41 | 42 | 43 | data LaplaceDistribution = LD { 44 | ldLocation :: {-# UNPACK #-} !Double 45 | -- ^ Location. 46 | , ldScale :: {-# UNPACK #-} !Double 47 | -- ^ Scale. 48 | } deriving (Eq, Typeable, Data, Generic) 49 | 50 | instance Show LaplaceDistribution where 51 | showsPrec i (LD l s) = defaultShow2 "laplace" l s i 52 | instance Read LaplaceDistribution where 53 | readPrec = defaultReadPrecM2 "laplace" laplaceE 54 | 55 | instance ToJSON LaplaceDistribution 56 | instance FromJSON LaplaceDistribution where 57 | parseJSON (Object v) = do 58 | l <- v .: "ldLocation" 59 | s <- v .: "ldScale" 60 | maybe (fail $ errMsg l s) return $ laplaceE l s 61 | parseJSON _ = empty 62 | 63 | instance Binary LaplaceDistribution where 64 | put (LD l s) = put l >> put s 65 | get = do 66 | l <- get 67 | s <- get 68 | maybe (fail $ errMsg l s) return $ laplaceE l s 69 | 70 | instance D.Distribution LaplaceDistribution where 71 | cumulative = cumulative 72 | complCumulative = complCumulative 73 | 74 | instance D.ContDistr LaplaceDistribution where 75 | density (LD l s) x = exp (- abs (x - l) / s) / (2 * s) 76 | logDensity (LD l s) x = - abs (x - l) / s - log 2 - log s 77 | quantile = quantile 78 | complQuantile = complQuantile 79 | 80 | instance D.Mean LaplaceDistribution where 81 | mean (LD l _) = l 82 | 83 | instance D.Variance LaplaceDistribution where 84 | variance (LD _ s) = 2 * s * s 85 | 86 | instance D.MaybeMean LaplaceDistribution where 87 | maybeMean = Just . D.mean 88 | 89 | instance D.MaybeVariance LaplaceDistribution where 90 | maybeStdDev = Just . D.stdDev 91 | maybeVariance = Just . D.variance 92 | 93 | instance D.Entropy LaplaceDistribution where 94 | entropy (LD _ s) = 1 + log (2 * s) 95 | 96 | instance D.MaybeEntropy LaplaceDistribution where 97 | maybeEntropy = Just . D.entropy 98 | 99 | instance D.ContGen LaplaceDistribution where 100 | genContVar = D.genContinuous 101 | 102 | cumulative :: LaplaceDistribution -> Double -> Double 103 | cumulative (LD l s) x 104 | | x <= l = 0.5 * exp ( (x - l) / s) 105 | | otherwise = 1 - 0.5 * exp ( - (x - l) / s ) 106 | 107 | complCumulative :: LaplaceDistribution -> Double -> Double 108 | complCumulative (LD l s) x 109 | | x <= l = 1 - 0.5 * exp ( (x - l) / s) 110 | | otherwise = 0.5 * exp ( - (x - l) / s ) 111 | 112 | quantile :: LaplaceDistribution -> Double -> Double 113 | quantile (LD l s) p 114 | | p == 0 = -inf 115 | | p == 1 = inf 116 | | p == 0.5 = l 117 | | p > 0 && p < 0.5 = l + s * log (2 * p) 118 | | p > 0.5 && p < 1 = l - s * log (2 - 2 * p) 119 | | otherwise = 120 | error $ "Statistics.Distribution.Laplace.quantile: p must be in [0,1] range. Got: "++show p 121 | where 122 | inf = 1 / 0 123 | 124 | complQuantile :: LaplaceDistribution -> Double -> Double 125 | complQuantile (LD l s) p 126 | | p == 0 = inf 127 | | p == 1 = -inf 128 | | p == 0.5 = l 129 | | p > 0 && p < 0.5 = l - s * log (2 * p) 130 | | p > 0.5 && p < 1 = l + s * log (2 - 2 * p) 131 | | otherwise = 132 | error $ "Statistics.Distribution.Laplace.quantile: p must be in [0,1] range. Got: "++show p 133 | where 134 | inf = 1 / 0 135 | 136 | -- | Create an Laplace distribution. 137 | laplace :: Double -- ^ Location 138 | -> Double -- ^ Scale 139 | -> LaplaceDistribution 140 | laplace l s = maybe (error $ errMsg l s) id $ laplaceE l s 141 | 142 | -- | Create an Laplace distribution. 143 | laplaceE :: Double -- ^ Location 144 | -> Double -- ^ Scale 145 | -> Maybe LaplaceDistribution 146 | laplaceE l s 147 | | s >= 0 = Just (LD l s) 148 | | otherwise = Nothing 149 | 150 | errMsg :: Double -> Double -> String 151 | errMsg _ s = "Statistics.Distribution.Laplace.laplace: scale parameter must be positive. Got " ++ show s 152 | 153 | 154 | -- | Create Laplace distribution from sample. The location is estimated 155 | -- as the median of the sample, and the scale as the mean absolute 156 | -- deviation of the median. 157 | instance D.FromSample LaplaceDistribution Double where 158 | fromSample xs 159 | | G.null xs = Nothing 160 | | otherwise = Just $! LD s l 161 | where 162 | s = Q.median Q.medianUnbiased xs 163 | l = S.mean $ G.map (\x -> abs $ x - s) xs 164 | -------------------------------------------------------------------------------- /Statistics/Distribution/Lognormal.hs: -------------------------------------------------------------------------------- 1 | {-# LANGUAGE MultiParamTypeClasses #-} 2 | {-# LANGUAGE DeriveDataTypeable, DeriveGeneric #-} 3 | -- | 4 | -- Module : Statistics.Distribution.Lognormal 5 | -- Copyright : (c) 2020 Ximin Luo 6 | -- License : BSD3 7 | -- 8 | -- Maintainer : infinity0@pwned.gg 9 | -- Stability : experimental 10 | -- Portability : portable 11 | -- 12 | -- The log normal distribution. This is a continuous probability 13 | -- distribution that describes data whose log is clustered around a 14 | -- mean. For example, the multiplicative product of many independent 15 | -- positive random variables. 16 | 17 | module Statistics.Distribution.Lognormal 18 | ( 19 | LognormalDistribution 20 | -- * Constructors 21 | , lognormalDistr 22 | , lognormalDistrErr 23 | , lognormalDistrMeanStddevErr 24 | , lognormalStandard 25 | ) where 26 | 27 | import Data.Aeson (FromJSON, ToJSON) 28 | import Data.Binary (Binary (..)) 29 | import Data.Data (Data, Typeable) 30 | import GHC.Generics (Generic) 31 | import Numeric.MathFunctions.Constants (m_huge, m_sqrt_2_pi) 32 | import Numeric.SpecFunctions (expm1, log1p) 33 | import qualified Data.Vector.Generic as G 34 | 35 | import qualified Statistics.Distribution as D 36 | import qualified Statistics.Distribution.Normal as N 37 | import Statistics.Internal 38 | 39 | 40 | -- | The lognormal distribution. 41 | newtype LognormalDistribution = LND N.NormalDistribution 42 | deriving (Eq, Typeable, Data, Generic) 43 | 44 | instance Show LognormalDistribution where 45 | showsPrec i (LND d) = defaultShow2 "lognormalDistr" m s i 46 | where 47 | m = D.mean d 48 | s = D.stdDev d 49 | instance Read LognormalDistribution where 50 | readPrec = defaultReadPrecM2 "lognormalDistr" $ 51 | (either (const Nothing) Just .) . lognormalDistrErr 52 | 53 | instance ToJSON LognormalDistribution 54 | instance FromJSON LognormalDistribution 55 | 56 | instance Binary LognormalDistribution where 57 | put (LND d) = put m >> put s 58 | where 59 | m = D.mean d 60 | s = D.stdDev d 61 | get = do 62 | m <- get 63 | sd <- get 64 | either fail return $ lognormalDistrErr m sd 65 | 66 | instance D.Distribution LognormalDistribution where 67 | cumulative = cumulative 68 | complCumulative = complCumulative 69 | 70 | instance D.ContDistr LognormalDistribution where 71 | logDensity = logDensity 72 | quantile = quantile 73 | complQuantile = complQuantile 74 | 75 | instance D.MaybeMean LognormalDistribution where 76 | maybeMean = Just . D.mean 77 | 78 | instance D.Mean LognormalDistribution where 79 | mean (LND d) = exp (m + v / 2) 80 | where 81 | m = D.mean d 82 | v = D.variance d 83 | 84 | instance D.MaybeVariance LognormalDistribution where 85 | maybeStdDev = Just . D.stdDev 86 | maybeVariance = Just . D.variance 87 | 88 | instance D.Variance LognormalDistribution where 89 | variance (LND d) = expm1 v * exp (2 * m + v) 90 | where 91 | m = D.mean d 92 | v = D.variance d 93 | 94 | instance D.Entropy LognormalDistribution where 95 | entropy (LND d) = logBase 2 (s * exp (m + 0.5) * m_sqrt_2_pi) 96 | where 97 | m = D.mean d 98 | s = D.stdDev d 99 | 100 | instance D.MaybeEntropy LognormalDistribution where 101 | maybeEntropy = Just . D.entropy 102 | 103 | instance D.ContGen LognormalDistribution where 104 | genContVar d = D.genContinuous d 105 | 106 | -- | Standard log normal distribution with mu 0 and sigma 1. 107 | -- 108 | -- Mean is @sqrt e@ and variance is @(e - 1) * e@. 109 | lognormalStandard :: LognormalDistribution 110 | lognormalStandard = LND N.standard 111 | 112 | -- | Create log normal distribution from parameters. 113 | lognormalDistr 114 | :: Double -- ^ Mu 115 | -> Double -- ^ Sigma 116 | -> LognormalDistribution 117 | lognormalDistr mu sig = either error id $ lognormalDistrErr mu sig 118 | 119 | -- | Create log normal distribution from parameters. 120 | lognormalDistrErr 121 | :: Double -- ^ Mu 122 | -> Double -- ^ Sigma 123 | -> Either String LognormalDistribution 124 | lognormalDistrErr mu sig 125 | | sig >= sqrt (log m_huge - 2 * mu) = Left $ errMsg mu sig 126 | | otherwise = LND <$> N.normalDistrErr mu sig 127 | 128 | errMsg :: Double -> Double -> String 129 | errMsg mu sig = 130 | "Statistics.Distribution.Lognormal.lognormalDistr: sigma must be > 0 && < " 131 | ++ show lim ++ ". Got " ++ show sig 132 | where lim = sqrt (log m_huge - 2 * mu) 133 | 134 | -- | Create log normal distribution from mean and standard deviation. 135 | lognormalDistrMeanStddevErr 136 | :: Double -- ^ Mu 137 | -> Double -- ^ Sigma 138 | -> Either String LognormalDistribution 139 | lognormalDistrMeanStddevErr m sd = LND <$> N.normalDistrErr mu sig 140 | where r = sd / m 141 | sig2 = log1p (r * r) 142 | sig = sqrt sig2 143 | mu = log m - sig2 / 2 144 | 145 | -- | Variance is estimated using maximum likelihood method 146 | -- (biased estimation) over the log of the data. 147 | -- 148 | -- Returns @Nothing@ if sample contains less than one element or 149 | -- variance is zero (all elements are equal) 150 | instance D.FromSample LognormalDistribution Double where 151 | fromSample = fmap LND . D.fromSample . G.map log 152 | 153 | logDensity :: LognormalDistribution -> Double -> Double 154 | logDensity (LND d) x 155 | | x > 0 = let lx = log x in D.logDensity d lx - lx 156 | | otherwise = 0 157 | 158 | cumulative :: LognormalDistribution -> Double -> Double 159 | cumulative (LND d) x 160 | | x > 0 = D.cumulative d $ log x 161 | | otherwise = 0 162 | 163 | complCumulative :: LognormalDistribution -> Double -> Double 164 | complCumulative (LND d) x 165 | | x > 0 = D.complCumulative d $ log x 166 | | otherwise = 1 167 | 168 | quantile :: LognormalDistribution -> Double -> Double 169 | quantile (LND d) = exp . D.quantile d 170 | 171 | complQuantile :: LognormalDistribution -> Double -> Double 172 | complQuantile (LND d) = exp . D.complQuantile d 173 | -------------------------------------------------------------------------------- /Statistics/Distribution/Poisson.hs: -------------------------------------------------------------------------------- 1 | {-# LANGUAGE OverloadedStrings #-} 2 | {-# LANGUAGE DeriveDataTypeable, DeriveGeneric #-} 3 | -- | 4 | -- Module : Statistics.Distribution.Poisson 5 | -- Copyright : (c) 2009, 2011 Bryan O'Sullivan 6 | -- License : BSD3 7 | -- 8 | -- Maintainer : bos@serpentine.com 9 | -- Stability : experimental 10 | -- Portability : portable 11 | -- 12 | -- The Poisson distribution. This is the discrete probability 13 | -- distribution of a number of events occurring in a fixed interval if 14 | -- these events occur with a known average rate, and occur 15 | -- independently from each other within that interval. 16 | 17 | module Statistics.Distribution.Poisson 18 | ( 19 | PoissonDistribution 20 | -- * Constructors 21 | , poisson 22 | , poissonE 23 | -- * Accessors 24 | , poissonLambda 25 | -- * References 26 | -- $references 27 | ) where 28 | 29 | import Control.Applicative 30 | import Data.Aeson (FromJSON(..), ToJSON, Value(..), (.:)) 31 | import Data.Binary (Binary(..)) 32 | import Data.Data (Data, Typeable) 33 | import GHC.Generics (Generic) 34 | import Numeric.SpecFunctions (incompleteGamma,logFactorial) 35 | import Numeric.MathFunctions.Constants (m_neg_inf) 36 | 37 | import qualified Statistics.Distribution as D 38 | import qualified Statistics.Distribution.Poisson.Internal as I 39 | import Statistics.Internal 40 | 41 | 42 | 43 | newtype PoissonDistribution = PD { 44 | poissonLambda :: Double 45 | } deriving (Eq, Typeable, Data, Generic) 46 | 47 | instance Show PoissonDistribution where 48 | showsPrec i (PD l) = defaultShow1 "poisson" l i 49 | instance Read PoissonDistribution where 50 | readPrec = defaultReadPrecM1 "poisson" poissonE 51 | 52 | instance ToJSON PoissonDistribution 53 | instance FromJSON PoissonDistribution where 54 | parseJSON (Object v) = do 55 | l <- v .: "poissonLambda" 56 | maybe (fail $ errMsg l) return $ poissonE l 57 | parseJSON _ = empty 58 | 59 | instance Binary PoissonDistribution where 60 | put = put . poissonLambda 61 | get = do 62 | l <- get 63 | maybe (fail $ errMsg l) return $ poissonE l 64 | 65 | instance D.Distribution PoissonDistribution where 66 | cumulative (PD lambda) x 67 | | x < 0 = 0 68 | | isInfinite x = 1 69 | | isNaN x = error "Statistics.Distribution.Poisson.cumulative: NaN input" 70 | | otherwise = 1 - incompleteGamma (fromIntegral (floor x + 1 :: Int)) lambda 71 | 72 | instance D.DiscreteDistr PoissonDistribution where 73 | probability (PD lambda) x = I.probability lambda (fromIntegral x) 74 | logProbability (PD lambda) i 75 | | i < 0 = m_neg_inf 76 | | otherwise = log lambda * fromIntegral i - logFactorial i - lambda 77 | 78 | instance D.Variance PoissonDistribution where 79 | variance = poissonLambda 80 | 81 | instance D.Mean PoissonDistribution where 82 | mean = poissonLambda 83 | 84 | instance D.MaybeMean PoissonDistribution where 85 | maybeMean = Just . D.mean 86 | 87 | instance D.MaybeVariance PoissonDistribution where 88 | maybeStdDev = Just . D.stdDev 89 | 90 | instance D.Entropy PoissonDistribution where 91 | entropy (PD lambda) = I.poissonEntropy lambda 92 | 93 | instance D.MaybeEntropy PoissonDistribution where 94 | maybeEntropy = Just . D.entropy 95 | 96 | -- | Create Poisson distribution. 97 | poisson :: Double -> PoissonDistribution 98 | poisson l = maybe (error $ errMsg l) id $ poissonE l 99 | 100 | -- | Create Poisson distribution. 101 | poissonE :: Double -> Maybe PoissonDistribution 102 | poissonE l 103 | | l >= 0 = Just (PD l) 104 | | otherwise = Nothing 105 | 106 | errMsg :: Double -> String 107 | errMsg l = "Statistics.Distribution.Poisson.poisson: lambda must be non-negative. Got " 108 | ++ show l 109 | 110 | 111 | -- $references 112 | -- 113 | -- * Loader, C. (2000) Fast and Accurate Computation of Binomial 114 | -- Probabilities. 115 | -- * Adell, J., Lekuona, A., and Yu, Y. (2010) Sharp Bounds on the 116 | -- Entropy of the Poisson Law and Related Quantities 117 | -- 118 | -------------------------------------------------------------------------------- /Statistics/Distribution/StudentT.hs: -------------------------------------------------------------------------------- 1 | {-# LANGUAGE OverloadedStrings #-} 2 | {-# LANGUAGE DeriveDataTypeable, DeriveGeneric #-} 3 | -- | 4 | -- Module : Statistics.Distribution.StudentT 5 | -- Copyright : (c) 2011 Aleksey Khudyakov 6 | -- License : BSD3 7 | -- 8 | -- Maintainer : bos@serpentine.com 9 | -- Stability : experimental 10 | -- Portability : portable 11 | -- 12 | -- Student-T distribution 13 | module Statistics.Distribution.StudentT ( 14 | StudentT 15 | -- * Constructors 16 | , studentT 17 | , studentTE 18 | , studentTUnstandardized 19 | -- * Accessors 20 | , studentTndf 21 | ) where 22 | 23 | import Control.Applicative 24 | import Data.Aeson (FromJSON(..), ToJSON, Value(..), (.:)) 25 | import Data.Binary (Binary(..)) 26 | import Data.Data (Data, Typeable) 27 | import GHC.Generics (Generic) 28 | import Numeric.SpecFunctions ( 29 | logBeta, incompleteBeta, invIncompleteBeta, digamma, log1p) 30 | 31 | import qualified Statistics.Distribution as D 32 | import Statistics.Distribution.Transform (LinearTransform (..)) 33 | import Statistics.Internal 34 | 35 | 36 | -- | Student-T distribution 37 | newtype StudentT = StudentT { studentTndf :: Double } 38 | deriving (Eq, Typeable, Data, Generic) 39 | 40 | instance Show StudentT where 41 | showsPrec i (StudentT ndf) = defaultShow1 "studentT" ndf i 42 | instance Read StudentT where 43 | readPrec = defaultReadPrecM1 "studentT" studentTE 44 | 45 | instance ToJSON StudentT 46 | instance FromJSON StudentT where 47 | parseJSON (Object v) = do 48 | ndf <- v .: "studentTndf" 49 | maybe (fail $ errMsg ndf) return $ studentTE ndf 50 | parseJSON _ = empty 51 | 52 | instance Binary StudentT where 53 | put = put . studentTndf 54 | get = do 55 | ndf <- get 56 | maybe (fail $ errMsg ndf) return $ studentTE ndf 57 | 58 | -- | Create Student-T distribution. Number of parameters must be positive. 59 | studentT :: Double -> StudentT 60 | studentT ndf = maybe (error $ errMsg ndf) id $ studentTE ndf 61 | 62 | -- | Create Student-T distribution. Number of parameters must be positive. 63 | studentTE :: Double -> Maybe StudentT 64 | studentTE ndf 65 | | ndf > 0 = Just (StudentT ndf) 66 | | otherwise = Nothing 67 | 68 | errMsg :: Double -> String 69 | errMsg _ = modErr "studentT" "non-positive number of degrees of freedom" 70 | 71 | 72 | instance D.Distribution StudentT where 73 | cumulative = cumulative 74 | complCumulative = complCumulative 75 | 76 | instance D.ContDistr StudentT where 77 | density d@(StudentT ndf) x = exp (logDensityUnscaled d x) / sqrt ndf 78 | logDensity d@(StudentT ndf) x = logDensityUnscaled d x - log (sqrt ndf) 79 | quantile = quantile 80 | 81 | cumulative :: StudentT -> Double -> Double 82 | cumulative (StudentT ndf) x 83 | | x > 0 = 1 - 0.5 * ibeta 84 | | otherwise = 0.5 * ibeta 85 | where 86 | ibeta = incompleteBeta (0.5 * ndf) 0.5 (ndf / (ndf + x*x)) 87 | 88 | complCumulative :: StudentT -> Double -> Double 89 | complCumulative (StudentT ndf) x 90 | | x > 0 = 0.5 * ibeta 91 | | otherwise = 1 - 0.5 * ibeta 92 | where 93 | ibeta = incompleteBeta (0.5 * ndf) 0.5 (ndf / (ndf + x*x)) 94 | 95 | 96 | logDensityUnscaled :: StudentT -> Double -> Double 97 | logDensityUnscaled (StudentT ndf) x 98 | = log1p (x*x/ndf) * (-(0.5 * (1 + ndf))) 99 | - logBeta 0.5 (0.5 * ndf) 100 | 101 | quantile :: StudentT -> Double -> Double 102 | quantile (StudentT ndf) p 103 | | p >= 0 && p <= 1 = 104 | let x = invIncompleteBeta (0.5 * ndf) 0.5 (2 * min p (1 - p)) 105 | in case sqrt $ ndf * (1 - x) / x of 106 | r | p < 0.5 -> -r 107 | | otherwise -> r 108 | | otherwise = modErr "quantile" $ "p must be in [0,1] range. Got: "++show p 109 | 110 | 111 | instance D.MaybeMean StudentT where 112 | maybeMean (StudentT ndf) | ndf > 1 = Just 0 113 | | otherwise = Nothing 114 | 115 | instance D.MaybeVariance StudentT where 116 | maybeVariance (StudentT ndf) | ndf > 2 = Just $! ndf / (ndf - 2) 117 | | otherwise = Nothing 118 | 119 | instance D.Entropy StudentT where 120 | entropy (StudentT ndf) = 121 | 0.5 * (ndf+1) * (digamma ((1+ndf)/2) - digamma(ndf/2)) 122 | + log (sqrt ndf) 123 | + logBeta (ndf/2) 0.5 124 | 125 | instance D.MaybeEntropy StudentT where 126 | maybeEntropy = Just . D.entropy 127 | 128 | instance D.ContGen StudentT where 129 | genContVar = D.genContinuous 130 | 131 | -- | Create an unstandardized Student-t distribution. 132 | studentTUnstandardized :: Double -- ^ Number of degrees of freedom 133 | -> Double -- ^ Central value (0 for standard Student T distribution) 134 | -> Double -- ^ Scale parameter 135 | -> LinearTransform StudentT 136 | studentTUnstandardized ndf mu sigma 137 | | sigma > 0 = LinearTransform mu sigma $ studentT ndf 138 | | otherwise = modErr "studentTUnstandardized" $ "sigma must be > 0. Got: " ++ show sigma 139 | 140 | modErr :: String -> String -> a 141 | modErr fun msg = error $ "Statistics.Distribution.StudentT." ++ fun ++ ": " ++ msg 142 | -------------------------------------------------------------------------------- /Statistics/Distribution/Transform.hs: -------------------------------------------------------------------------------- 1 | {-# LANGUAGE DeriveDataTypeable, DeriveGeneric, FlexibleContexts, 2 | FlexibleInstances, UndecidableInstances #-} 3 | -- | 4 | -- Module : Statistics.Distribution.Transform 5 | -- Copyright : (c) 2013 John McDonnell; 6 | -- License : BSD3 7 | -- 8 | -- Maintainer : bos@serpentine.com 9 | -- Stability : experimental 10 | -- Portability : portable 11 | -- 12 | -- Transformations over distributions 13 | module Statistics.Distribution.Transform 14 | ( 15 | LinearTransform (..) 16 | , linTransFixedPoint 17 | , scaleAround 18 | ) where 19 | 20 | import Data.Aeson (FromJSON, ToJSON) 21 | import Data.Binary (Binary) 22 | import Data.Binary (put, get) 23 | import Data.Data (Data, Typeable) 24 | import GHC.Generics (Generic) 25 | import qualified Statistics.Distribution as D 26 | 27 | -- | Linear transformation applied to distribution. 28 | -- 29 | -- > LinearTransform μ σ _ 30 | -- > x' = μ + σ·x 31 | data LinearTransform d = LinearTransform 32 | { linTransLocation :: {-# UNPACK #-} !Double 33 | -- ^ Location parameter. 34 | , linTransScale :: {-# UNPACK #-} !Double 35 | -- ^ Scale parameter. 36 | , linTransDistr :: d 37 | -- ^ Distribution being transformed. 38 | } deriving (Eq, Show, Read, Typeable, Data, Generic) 39 | 40 | instance (FromJSON d) => FromJSON (LinearTransform d) 41 | instance (ToJSON d) => ToJSON (LinearTransform d) 42 | 43 | instance (Binary d) => Binary (LinearTransform d) where 44 | get = LinearTransform <$> get <*> get <*> get 45 | put (LinearTransform x y z) = put x >> put y >> put z 46 | 47 | -- | Apply linear transformation to distribution. 48 | scaleAround :: Double -- ^ Fixed point 49 | -> Double -- ^ Scale parameter 50 | -> d -- ^ Distribution 51 | -> LinearTransform d 52 | scaleAround x0 sc = LinearTransform (x0 * (1 - sc)) sc 53 | 54 | -- | Get fixed point of linear transformation 55 | linTransFixedPoint :: LinearTransform d -> Double 56 | linTransFixedPoint (LinearTransform loc sc _) = loc / (1 - sc) 57 | 58 | instance Functor LinearTransform where 59 | fmap f (LinearTransform loc sc dist) = LinearTransform loc sc (f dist) 60 | 61 | instance D.Distribution d => D.Distribution (LinearTransform d) where 62 | cumulative (LinearTransform loc sc dist) x = D.cumulative dist $ (x-loc) / sc 63 | 64 | instance D.ContDistr d => D.ContDistr (LinearTransform d) where 65 | density (LinearTransform loc sc dist) x = D.density dist ((x-loc) / sc) / sc 66 | logDensity (LinearTransform loc sc dist) x = D.logDensity dist ((x-loc) / sc) - log sc 67 | quantile (LinearTransform loc sc dist) p = loc + sc * D.quantile dist p 68 | complQuantile (LinearTransform loc sc dist) p = loc + sc * D.complQuantile dist p 69 | 70 | instance D.MaybeMean d => D.MaybeMean (LinearTransform d) where 71 | maybeMean (LinearTransform loc _ dist) = (+loc) <$> D.maybeMean dist 72 | 73 | instance (D.Mean d) => D.Mean (LinearTransform d) where 74 | mean (LinearTransform loc _ dist) = loc + D.mean dist 75 | 76 | instance D.MaybeVariance d => D.MaybeVariance (LinearTransform d) where 77 | maybeVariance (LinearTransform _ sc dist) = (*(sc*sc)) <$> D.maybeVariance dist 78 | maybeStdDev (LinearTransform _ sc dist) = (*sc) <$> D.maybeStdDev dist 79 | 80 | instance (D.Variance d) => D.Variance (LinearTransform d) where 81 | variance (LinearTransform _ sc dist) = sc * sc * D.variance dist 82 | stdDev (LinearTransform _ sc dist) = sc * D.stdDev dist 83 | 84 | instance (D.MaybeEntropy d) => D.MaybeEntropy (LinearTransform d) where 85 | maybeEntropy (LinearTransform _ _ dist) = D.maybeEntropy dist 86 | 87 | instance (D.Entropy d) => D.Entropy (LinearTransform d) where 88 | entropy (LinearTransform _ _ dist) = D.entropy dist 89 | 90 | instance D.ContGen d => D.ContGen (LinearTransform d) where 91 | genContVar (LinearTransform loc sc d) g = do 92 | x <- D.genContVar d g 93 | return $! loc + sc * x 94 | -------------------------------------------------------------------------------- /Statistics/Distribution/Uniform.hs: -------------------------------------------------------------------------------- 1 | {-# LANGUAGE OverloadedStrings #-} 2 | {-# LANGUAGE DeriveDataTypeable, DeriveGeneric #-} 3 | -- | 4 | -- Module : Statistics.Distribution.Uniform 5 | -- Copyright : (c) 2011 Aleksey Khudyakov 6 | -- License : BSD3 7 | -- 8 | -- Maintainer : bos@serpentine.com 9 | -- Stability : experimental 10 | -- Portability : portable 11 | -- 12 | -- Variate distributed uniformly in the interval. 13 | module Statistics.Distribution.Uniform 14 | ( 15 | UniformDistribution 16 | -- * Constructors 17 | , uniformDistr 18 | , uniformDistrE 19 | -- ** Accessors 20 | , uniformA 21 | , uniformB 22 | ) where 23 | 24 | import Control.Applicative 25 | import Data.Aeson (FromJSON(..), ToJSON, Value(..), (.:)) 26 | import Data.Binary (Binary(..)) 27 | import Data.Data (Data, Typeable) 28 | import System.Random.Stateful (uniformRM) 29 | import GHC.Generics (Generic) 30 | 31 | import qualified Statistics.Distribution as D 32 | import Statistics.Internal 33 | 34 | 35 | 36 | -- | Uniform distribution from A to B 37 | data UniformDistribution = UniformDistribution { 38 | uniformA :: {-# UNPACK #-} !Double -- ^ Low boundary of distribution 39 | , uniformB :: {-# UNPACK #-} !Double -- ^ Upper boundary of distribution 40 | } deriving (Eq, Typeable, Data, Generic) 41 | 42 | instance Show UniformDistribution where 43 | showsPrec i (UniformDistribution a b) = defaultShow2 "uniformDistr" a b i 44 | instance Read UniformDistribution where 45 | readPrec = defaultReadPrecM2 "uniformDistr" uniformDistrE 46 | 47 | instance ToJSON UniformDistribution 48 | instance FromJSON UniformDistribution where 49 | parseJSON (Object v) = do 50 | a <- v .: "uniformA" 51 | b <- v .: "uniformB" 52 | maybe (fail errMsg) return $ uniformDistrE a b 53 | parseJSON _ = empty 54 | 55 | instance Binary UniformDistribution where 56 | put (UniformDistribution x y) = put x >> put y 57 | get = do 58 | a <- get 59 | b <- get 60 | maybe (fail errMsg) return $ uniformDistrE a b 61 | 62 | -- | Create uniform distribution. 63 | uniformDistr :: Double -> Double -> UniformDistribution 64 | uniformDistr a b = maybe (error errMsg) id $ uniformDistrE a b 65 | 66 | -- | Create uniform distribution. 67 | uniformDistrE :: Double -> Double -> Maybe UniformDistribution 68 | uniformDistrE a b 69 | | b < a = Just $ UniformDistribution b a 70 | | a < b = Just $ UniformDistribution a b 71 | | otherwise = Nothing 72 | -- NOTE: failure is in default branch to guard against NaNs. 73 | 74 | errMsg :: String 75 | errMsg = "Statistics.Distribution.Uniform.uniform: wrong parameters" 76 | 77 | 78 | instance D.Distribution UniformDistribution where 79 | cumulative (UniformDistribution a b) x 80 | | x < a = 0 81 | | x > b = 1 82 | | otherwise = (x - a) / (b - a) 83 | 84 | instance D.ContDistr UniformDistribution where 85 | density (UniformDistribution a b) x 86 | | x < a = 0 87 | | x > b = 0 88 | | otherwise = 1 / (b - a) 89 | quantile (UniformDistribution a b) p 90 | | p >= 0 && p <= 1 = a + (b - a) * p 91 | | otherwise = 92 | error $ "Statistics.Distribution.Uniform.quantile: p must be in [0,1] range. Got: "++show p 93 | complQuantile (UniformDistribution a b) p 94 | | p >= 0 && p <= 1 = b + (a - b) * p 95 | | otherwise = 96 | error $ "Statistics.Distribution.Uniform.complQuantile: p must be in [0,1] range. Got: "++show p 97 | 98 | instance D.Mean UniformDistribution where 99 | mean (UniformDistribution a b) = 0.5 * (a + b) 100 | 101 | instance D.Variance UniformDistribution where 102 | -- NOTE: 1/sqrt 12 is not constant folded (#4101) so it's written as 103 | -- numerical constant. (Also FIXME!) 104 | stdDev (UniformDistribution a b) = 0.2886751345948129 * (b - a) 105 | variance (UniformDistribution a b) = d * d / 12 where d = b - a 106 | 107 | instance D.MaybeMean UniformDistribution where 108 | maybeMean = Just . D.mean 109 | 110 | instance D.MaybeVariance UniformDistribution where 111 | maybeStdDev = Just . D.stdDev 112 | 113 | instance D.Entropy UniformDistribution where 114 | entropy (UniformDistribution a b) = log (b - a) 115 | 116 | instance D.MaybeEntropy UniformDistribution where 117 | maybeEntropy = Just . D.entropy 118 | 119 | instance D.ContGen UniformDistribution where 120 | genContVar (UniformDistribution a b) = uniformRM (a,b) 121 | -------------------------------------------------------------------------------- /Statistics/Function.hs: -------------------------------------------------------------------------------- 1 | {-# LANGUAGE BangPatterns, CPP, FlexibleContexts, Rank2Types #-} 2 | {-# OPTIONS_GHC -fsimpl-tick-factor=200 #-} 3 | -- | 4 | -- Module : Statistics.Function 5 | -- Copyright : (c) 2009, 2010, 2011 Bryan O'Sullivan 6 | -- License : BSD3 7 | -- 8 | -- Maintainer : bos@serpentine.com 9 | -- Stability : experimental 10 | -- Portability : portable 11 | -- 12 | -- Useful functions. 13 | 14 | module Statistics.Function 15 | ( 16 | -- * Scanning 17 | minMax 18 | -- * Sorting 19 | , sort 20 | , gsort 21 | , sortBy 22 | , partialSort 23 | -- * Indexing 24 | , indexed 25 | , indices 26 | -- * Bit twiddling 27 | , nextHighestPowerOfTwo 28 | -- * Comparison 29 | , within 30 | -- * Arithmetic 31 | , square 32 | -- * Vectors 33 | , unsafeModify 34 | -- * Combinators 35 | , for 36 | , rfor 37 | ) where 38 | 39 | #include "MachDeps.h" 40 | 41 | import Control.Monad.ST (ST) 42 | import Data.Bits ((.|.), shiftR) 43 | import qualified Data.Vector.Algorithms.Intro as I 44 | import qualified Data.Vector.Generic as G 45 | import qualified Data.Vector.Unboxed as U 46 | import qualified Data.Vector.Unboxed.Mutable as M 47 | import Numeric.MathFunctions.Comparison (within) 48 | 49 | -- | Sort a vector. 50 | sort :: U.Vector Double -> U.Vector Double 51 | sort = G.modify I.sort 52 | {-# NOINLINE sort #-} 53 | 54 | -- | Sort a vector. 55 | gsort :: (Ord e, G.Vector v e) => v e -> v e 56 | gsort = G.modify I.sort 57 | {-# INLINE gsort #-} 58 | 59 | -- | Sort a vector using a custom ordering. 60 | sortBy :: (G.Vector v e) => I.Comparison e -> v e -> v e 61 | sortBy f = G.modify $ I.sortBy f 62 | {-# INLINE sortBy #-} 63 | 64 | -- | Partially sort a vector, such that the least /k/ elements will be 65 | -- at the front. 66 | partialSort :: (G.Vector v e, Ord e) => 67 | Int -- ^ The number /k/ of least elements. 68 | -> v e 69 | -> v e 70 | partialSort k = G.modify (`I.partialSort` k) 71 | {-# SPECIALIZE partialSort :: Int -> U.Vector Double -> U.Vector Double #-} 72 | 73 | -- | Return the indices of a vector. 74 | indices :: (G.Vector v a, G.Vector v Int) => v a -> v Int 75 | indices a = G.enumFromTo 0 (G.length a - 1) 76 | {-# INLINE indices #-} 77 | 78 | -- | Zip a vector with its indices. 79 | indexed :: (G.Vector v e, G.Vector v (Int,e)) => v e -> v (Int,e) 80 | indexed xs = G.imap (,) xs 81 | {-# INLINE indexed #-} 82 | 83 | data MM = MM {-# UNPACK #-} !Double {-# UNPACK #-} !Double 84 | 85 | -- | Compute the minimum and maximum of a vector in one pass. 86 | minMax :: (G.Vector v Double) => v Double -> (Double, Double) 87 | minMax = fini . G.foldl' go (MM (1/0) (-1/0)) 88 | where 89 | go (MM lo hi) k = MM (min lo k) (max hi k) 90 | fini (MM lo hi) = (lo, hi) 91 | {-# INLINE minMax #-} 92 | 93 | -- | Efficiently compute the next highest power of two for a 94 | -- non-negative integer. If the given value is already a power of 95 | -- two, it is returned unchanged. If negative, zero is returned. 96 | nextHighestPowerOfTwo :: Int -> Int 97 | nextHighestPowerOfTwo n 98 | #if WORD_SIZE_IN_BITS == 64 99 | = 1 + _i32 100 | #else 101 | = 1 + i16 102 | #endif 103 | where 104 | i0 = n - 1 105 | i1 = i0 .|. i0 `shiftR` 1 106 | i2 = i1 .|. i1 `shiftR` 2 107 | i4 = i2 .|. i2 `shiftR` 4 108 | i8 = i4 .|. i4 `shiftR` 8 109 | i16 = i8 .|. i8 `shiftR` 16 110 | _i32 = i16 .|. i16 `shiftR` 32 111 | -- It could be implemented as 112 | -- 113 | -- > nextHighestPowerOfTwo n = 1 + foldl' go (n-1) [1, 2, 4, 8, 16, 32] 114 | -- where go m i = m .|. m `shiftR` i 115 | -- 116 | -- But GHC do not inline foldl (probably because it's recursive) and 117 | -- as result function walks list of boxed ints. Hand rolled version 118 | -- uses unboxed arithmetic. 119 | 120 | -- | Multiply a number by itself. 121 | square :: Double -> Double 122 | square x = x * x 123 | 124 | -- | Simple for loop. Counts from /start/ to /end/-1. 125 | for :: Monad m => Int -> Int -> (Int -> m ()) -> m () 126 | for n0 !n f = loop n0 127 | where 128 | loop i | i == n = return () 129 | | otherwise = f i >> loop (i+1) 130 | {-# INLINE for #-} 131 | 132 | -- | Simple reverse-for loop. Counts from /start/-1 to /end/ (which 133 | -- must be less than /start/). 134 | rfor :: Monad m => Int -> Int -> (Int -> m ()) -> m () 135 | rfor n0 !n f = loop n0 136 | where 137 | loop i | i == n = return () 138 | | otherwise = let i' = i-1 in f i' >> loop i' 139 | {-# INLINE rfor #-} 140 | 141 | unsafeModify :: M.MVector s Double -> Int -> (Double -> Double) -> ST s () 142 | unsafeModify v i f = do 143 | k <- M.unsafeRead v i 144 | M.unsafeWrite v i (f k) 145 | {-# INLINE unsafeModify #-} 146 | -------------------------------------------------------------------------------- /Statistics/Internal.hs: -------------------------------------------------------------------------------- 1 | -- | 2 | -- Module : Statistics.Internal 3 | -- Copyright : (c) 2009 Bryan O'Sullivan 4 | -- License : BSD3 5 | -- 6 | -- Maintainer : bos@serpentine.com 7 | -- Stability : experimental 8 | -- Portability : portable 9 | -- 10 | -- 11 | module Statistics.Internal ( 12 | -- * Default definitions for Show 13 | defaultShow1 14 | , defaultShow2 15 | , defaultShow3 16 | -- * Default definitions for Read 17 | , defaultReadPrecM1 18 | , defaultReadPrecM2 19 | , defaultReadPrecM3 20 | -- * Reexports 21 | , Show(..) 22 | , Read(..) 23 | ) where 24 | 25 | import Control.Applicative 26 | import Control.Monad 27 | import Text.Read 28 | 29 | 30 | ---------------------------------------------------------------- 31 | -- Default show implementations 32 | ---------------------------------------------------------------- 33 | 34 | defaultShow1 :: (Show a) => String -> a -> Int -> ShowS 35 | defaultShow1 con a n 36 | = showParen (n >= 11) 37 | ( showString con 38 | . showChar ' ' 39 | . showsPrec 11 a 40 | ) 41 | 42 | defaultShow2 :: (Show a, Show b) => String -> a -> b -> Int -> ShowS 43 | defaultShow2 con a b n 44 | = showParen (n >= 11) 45 | ( showString con 46 | . showChar ' ' 47 | . showsPrec 11 a 48 | . showChar ' ' 49 | . showsPrec 11 b 50 | ) 51 | 52 | defaultShow3 :: (Show a, Show b, Show c) 53 | => String -> a -> b -> c -> Int -> ShowS 54 | defaultShow3 con a b c n 55 | = showParen (n >= 11) 56 | ( showString con 57 | . showChar ' ' 58 | . showsPrec 11 a 59 | . showChar ' ' 60 | . showsPrec 11 b 61 | . showChar ' ' 62 | . showsPrec 11 c 63 | ) 64 | 65 | ---------------------------------------------------------------- 66 | -- Default read implementations 67 | ---------------------------------------------------------------- 68 | 69 | defaultReadPrecM1 :: (Read a) => String -> (a -> Maybe r) -> ReadPrec r 70 | defaultReadPrecM1 con f = parens $ prec 10 $ do 71 | expect con 72 | a <- readPrec 73 | maybe empty return $ f a 74 | 75 | defaultReadPrecM2 :: (Read a, Read b) => String -> (a -> b -> Maybe r) -> ReadPrec r 76 | defaultReadPrecM2 con f = parens $ prec 10 $ do 77 | expect con 78 | a <- readPrec 79 | b <- readPrec 80 | maybe empty return $ f a b 81 | 82 | defaultReadPrecM3 :: (Read a, Read b, Read c) 83 | => String -> (a -> b -> c -> Maybe r) -> ReadPrec r 84 | defaultReadPrecM3 con f = parens $ prec 10 $ do 85 | expect con 86 | a <- readPrec 87 | b <- readPrec 88 | c <- readPrec 89 | maybe empty return $ f a b c 90 | 91 | expect :: String -> ReadPrec () 92 | expect str = do 93 | Ident s <- lexP 94 | guard (s == str) 95 | -------------------------------------------------------------------------------- /Statistics/Resampling/Bootstrap.hs: -------------------------------------------------------------------------------- 1 | -- | 2 | -- Module : Statistics.Resampling.Bootstrap 3 | -- Copyright : (c) 2009, 2011 Bryan O'Sullivan 4 | -- License : BSD3 5 | -- 6 | -- Maintainer : bos@serpentine.com 7 | -- Stability : experimental 8 | -- Portability : portable 9 | -- 10 | -- The bootstrap method for statistical inference. 11 | 12 | module Statistics.Resampling.Bootstrap 13 | ( bootstrapBCA 14 | , basicBootstrap 15 | -- * References 16 | -- $references 17 | ) where 18 | 19 | import Data.Vector.Generic ((!)) 20 | import qualified Data.Vector.Unboxed as U 21 | import qualified Data.Vector.Generic as G 22 | 23 | import Statistics.Distribution (cumulative, quantile) 24 | import Statistics.Distribution.Normal 25 | import Statistics.Resampling (Bootstrap(..), jackknife) 26 | import Statistics.Sample (mean) 27 | import Statistics.Types (Sample, CL, Estimate, ConfInt, estimateFromInterval, 28 | estimateFromErr, CL, significanceLevel) 29 | import Statistics.Function (gsort) 30 | 31 | import qualified Statistics.Resampling as R 32 | 33 | import Control.Parallel.Strategies (parMap, rdeepseq) 34 | 35 | data T = {-# UNPACK #-} !Double :< {-# UNPACK #-} !Double 36 | infixl 2 :< 37 | 38 | -- | Bias-corrected accelerated (BCA) bootstrap. This adjusts for both 39 | -- bias and skewness in the resampled distribution. 40 | -- 41 | -- BCA algorithm is described in ch. 5 of Davison, Hinkley "Confidence 42 | -- intervals" in section 5.3 "Percentile method" 43 | bootstrapBCA 44 | :: CL Double -- ^ Confidence level 45 | -> Sample -- ^ Full data sample 46 | -> [(R.Estimator, Bootstrap U.Vector Double)] 47 | -- ^ Estimates obtained from resampled data and estimator used for 48 | -- this. 49 | -> [Estimate ConfInt Double] 50 | bootstrapBCA confidenceLevel sample resampledData 51 | = parMap rdeepseq e resampledData 52 | where 53 | e (est, Bootstrap pt resample) 54 | | U.length sample == 1 || isInfinite bias = 55 | estimateFromErr pt (0,0) confidenceLevel 56 | | otherwise = 57 | estimateFromInterval pt (resample ! lo, resample ! hi) confidenceLevel 58 | where 59 | -- Quantile estimates for given CL 60 | lo = min (max (cumn a1) 0) (ni - 1) 61 | where a1 = bias + b1 / (1 - accel * b1) 62 | b1 = bias + z1 63 | hi = max (min (cumn a2) (ni - 1)) 0 64 | where a2 = bias + b2 / (1 - accel * b2) 65 | b2 = bias - z1 66 | -- Number of resamples 67 | ni = U.length resample 68 | n = fromIntegral ni 69 | -- Corrections 70 | z1 = quantile standard (significanceLevel confidenceLevel / 2) 71 | cumn = round . (*n) . cumulative standard 72 | bias = quantile standard (probN / n) 73 | where probN = fromIntegral . U.length . U.filter ( CL Double -- ^ Confidence vector 88 | -> Bootstrap v a -- ^ Estimate from full sample and vector of 89 | -- estimates obtained from resamples 90 | -> Estimate ConfInt a 91 | {-# INLINE basicBootstrap #-} 92 | basicBootstrap cl (Bootstrap e ests) 93 | = estimateFromInterval e (sorted ! lo, sorted ! hi) cl 94 | where 95 | sorted = gsort ests 96 | n = fromIntegral $ G.length ests 97 | c = n * (significanceLevel cl / 2) 98 | -- FIXME: can we have better estimates of quantiles in case when p 99 | -- is not multiple of 1/N 100 | -- 101 | -- FIXME: we could have undercoverage here 102 | lo = round c 103 | hi = truncate (n - c) 104 | 105 | -- $references 106 | -- 107 | -- * Davison, A.C; Hinkley, D.V. (1997) Bootstrap methods and their 108 | -- application. 109 | -------------------------------------------------------------------------------- /Statistics/Sample/Histogram.hs: -------------------------------------------------------------------------------- 1 | {-# LANGUAGE FlexibleContexts, BangPatterns, ScopedTypeVariables #-} 2 | 3 | -- | 4 | -- Module : Statistics.Sample.Histogram 5 | -- Copyright : (c) 2011 Bryan O'Sullivan 6 | -- License : BSD3 7 | -- 8 | -- Maintainer : bos@serpentine.com 9 | -- Stability : experimental 10 | -- Portability : portable 11 | -- 12 | -- Functions for computing histograms of sample data. 13 | 14 | module Statistics.Sample.Histogram 15 | ( 16 | histogram 17 | -- * Building blocks 18 | , histogram_ 19 | , range 20 | ) where 21 | 22 | import Control.Monad.ST 23 | import Numeric.MathFunctions.Constants (m_epsilon,m_tiny) 24 | import Statistics.Function (minMax) 25 | import qualified Data.Vector.Generic as G 26 | import qualified Data.Vector.Generic.Mutable as GM 27 | 28 | -- | /O(n)/ Compute a histogram over a data set. 29 | -- 30 | -- The result consists of a pair of vectors: 31 | -- 32 | -- * The lower bound of each interval. 33 | -- 34 | -- * The number of samples within the interval. 35 | -- 36 | -- Interval (bin) sizes are uniform, and the upper and lower bounds 37 | -- are chosen automatically using the 'range' function. To specify 38 | -- these parameters directly, use the 'histogram_' function. 39 | histogram :: (G.Vector v0 Double, G.Vector v1 Double, Num b, G.Vector v1 b) => 40 | Int -- ^ Number of bins (must be positive). 41 | -> v0 Double -- ^ Sample data (cannot be empty). 42 | -> (v1 Double, v1 b) 43 | histogram numBins xs = (G.generate numBins step, histogram_ numBins lo hi xs) 44 | where (lo,hi) = range numBins xs 45 | step i = lo + d * fromIntegral i 46 | d = (hi - lo) / fromIntegral numBins 47 | {-# INLINE histogram #-} 48 | 49 | -- | /O(n)/ Compute a histogram over a data set. 50 | -- 51 | -- Interval (bin) sizes are uniform, based on the supplied upper 52 | -- and lower bounds. 53 | histogram_ :: forall b a v0 v1. (Num b, RealFrac a, G.Vector v0 a, G.Vector v1 b) => 54 | Int 55 | -- ^ Number of bins. This value must be positive. A zero 56 | -- or negative value will cause an error. 57 | -> a 58 | -- ^ Lower bound on interval range. Sample data less than 59 | -- this will cause an error. 60 | -> a 61 | -- ^ Upper bound on interval range. This value must not be 62 | -- less than the lower bound. Sample data that falls above 63 | -- the upper bound will cause an error. 64 | -> v0 a 65 | -- ^ Sample data. 66 | -> v1 b 67 | histogram_ numBins lo hi xs0 = G.create (GM.replicate numBins 0 >>= bin xs0) 68 | where 69 | bin :: forall s. v0 a -> G.Mutable v1 s b -> ST s (G.Mutable v1 s b) 70 | bin xs bins = go 0 71 | where 72 | go i | i >= len = return bins 73 | | otherwise = do 74 | let x = xs `G.unsafeIndex` i 75 | b = truncate $ (x - lo) / d 76 | write' bins b . (+1) =<< GM.read bins b 77 | go (i+1) 78 | write' bins' b !e = GM.write bins' b e 79 | len = G.length xs 80 | d = ((hi - lo) / fromIntegral numBins) * (1 + realToFrac m_epsilon) 81 | {-# INLINE histogram_ #-} 82 | 83 | -- | /O(n)/ Compute decent defaults for the lower and upper bounds of 84 | -- a histogram, based on the desired number of bins and the range of 85 | -- the sample data. 86 | -- 87 | -- The upper and lower bounds used are @(lo-d, hi+d)@, where 88 | -- 89 | -- @d = (maximum sample - minimum sample) / ((bins - 1) * 2)@ 90 | -- 91 | -- If all elements in the sample are the same and equal to @x@ range 92 | -- is set to @(x - |x|/10, x + |x|/10)@. And if @x@ is equal to 0 range 93 | -- is set to @(-1,1)@. This is needed to avoid creating histogram with 94 | -- zero bin size. 95 | range :: (G.Vector v Double) => 96 | Int -- ^ Number of bins (must be positive). 97 | -> v Double -- ^ Sample data (cannot be empty). 98 | -> (Double, Double) 99 | range numBins xs 100 | | numBins < 1 = error "Statistics.Histogram.range: invalid bin count" 101 | | G.null xs = error "Statistics.Histogram.range: empty sample" 102 | | lo == hi = case abs lo / 10 of 103 | a | a < m_tiny -> (-1,1) 104 | | otherwise -> (lo - a, lo + a) 105 | | otherwise = (lo-d, hi+d) 106 | where 107 | d | numBins == 1 = 0 108 | | otherwise = (hi - lo) / ((fromIntegral numBins - 1) * 2) 109 | (lo,hi) = minMax xs 110 | {-# INLINE range #-} 111 | -------------------------------------------------------------------------------- /Statistics/Sample/Internal.hs: -------------------------------------------------------------------------------- 1 | {-# LANGUAGE FlexibleContexts #-} 2 | 3 | -- | 4 | -- Module : Statistics.Sample.Internal 5 | -- Copyright : (c) 2013 Bryan O'Sullivan 6 | -- License : BSD3 7 | -- 8 | -- Maintainer : bos@serpentine.com 9 | -- Stability : experimental 10 | -- Portability : portable 11 | -- 12 | -- Internal functions for computing over samples. 13 | module Statistics.Sample.Internal 14 | ( 15 | robustSumVar 16 | , sum 17 | ) where 18 | 19 | import Numeric.Sum (kbn, sumVector) 20 | import Prelude hiding (sum) 21 | import Statistics.Function (square) 22 | import qualified Data.Vector.Generic as G 23 | 24 | robustSumVar :: (G.Vector v Double) => Double -> v Double -> Double 25 | robustSumVar m = sum . G.map (square . subtract m) 26 | {-# INLINE robustSumVar #-} 27 | 28 | sum :: (G.Vector v Double) => v Double -> Double 29 | sum = sumVector kbn 30 | {-# INLINE sum #-} 31 | -------------------------------------------------------------------------------- /Statistics/Sample/KernelDensity.hs: -------------------------------------------------------------------------------- 1 | {-# LANGUAGE BangPatterns, FlexibleContexts, UnboxedTuples #-} 2 | -- | 3 | -- Module : Statistics.Sample.KernelDensity 4 | -- Copyright : (c) 2011 Bryan O'Sullivan 5 | -- License : BSD3 6 | -- 7 | -- Maintainer : bos@serpentine.com 8 | -- Stability : experimental 9 | -- Portability : portable 10 | -- 11 | -- Kernel density estimation. This module provides a fast, robust, 12 | -- non-parametric way to estimate the probability density function of 13 | -- a sample. 14 | -- 15 | -- This estimator does not use the commonly employed \"Gaussian rule 16 | -- of thumb\". As a result, it outperforms many plug-in methods on 17 | -- multimodal samples with widely separated modes. 18 | 19 | module Statistics.Sample.KernelDensity 20 | ( 21 | -- * Estimation functions 22 | kde 23 | , kde_ 24 | -- * References 25 | -- $references 26 | ) where 27 | 28 | import Data.Default.Class 29 | import Numeric.MathFunctions.Constants (m_sqrt_2_pi) 30 | import Numeric.RootFinding (fromRoot, ridders, RiddersParam(..), Tolerance(..)) 31 | import Prelude hiding (const, min, max, sum) 32 | import Statistics.Function (minMax, nextHighestPowerOfTwo) 33 | import Statistics.Sample.Histogram (histogram_) 34 | import Statistics.Sample.Internal (sum) 35 | import Statistics.Transform (CD, dct, idct) 36 | import qualified Data.Vector.Generic as G 37 | import qualified Data.Vector.Unboxed as U 38 | import qualified Data.Vector as V 39 | 40 | 41 | -- | Gaussian kernel density estimator for one-dimensional data, using 42 | -- the method of Botev et al. 43 | -- 44 | -- The result is a pair of vectors, containing: 45 | -- 46 | -- * The coordinates of each mesh point. The mesh interval is chosen 47 | -- to be 20% larger than the range of the sample. (To specify the 48 | -- mesh interval, use 'kde_'.) 49 | -- 50 | -- * Density estimates at each mesh point. 51 | kde :: (G.Vector v CD, G.Vector v Double, G.Vector v Int) 52 | => Int 53 | -- ^ The number of mesh points to use in the uniform discretization 54 | -- of the interval @(min,max)@. If this value is not a power of 55 | -- two, then it is rounded up to the next power of two. 56 | -> v Double -> (v Double, v Double) 57 | kde n0 xs = kde_ n0 (lo - range / 10) (hi + range / 10) xs 58 | where 59 | (lo,hi) = minMax xs 60 | range | G.length xs <= 1 = 1 -- Unreasonable guess 61 | | lo == hi = 1 -- All elements are equal 62 | | otherwise = hi - lo 63 | {-# INLINABLE kde #-} 64 | {-# SPECIAlIZE kde :: Int -> U.Vector Double -> (U.Vector Double, U.Vector Double) #-} 65 | {-# SPECIAlIZE kde :: Int -> V.Vector Double -> (V.Vector Double, V.Vector Double) #-} 66 | 67 | 68 | -- | Gaussian kernel density estimator for one-dimensional data, using 69 | -- the method of Botev et al. 70 | -- 71 | -- The result is a pair of vectors, containing: 72 | -- 73 | -- * The coordinates of each mesh point. 74 | -- 75 | -- * Density estimates at each mesh point. 76 | kde_ :: (G.Vector v CD, G.Vector v Double, G.Vector v Int) 77 | => Int 78 | -- ^ The number of mesh points to use in the uniform discretization 79 | -- of the interval @(min,max)@. If this value is not a power of 80 | -- two, then it is rounded up to the next power of two. 81 | -> Double 82 | -- ^ Lower bound (@min@) of the mesh range. 83 | -> Double 84 | -- ^ Upper bound (@max@) of the mesh range. 85 | -> v Double 86 | -> (v Double, v Double) 87 | kde_ n0 min max xs 88 | | G.null xs = error "Statistics.KernelDensity.kde: empty sample" 89 | | n0 <= 1 = error "Statistics.KernelDensity.kde: invalid number of points" 90 | | otherwise = (mesh, density) 91 | where 92 | mesh = G.generate ni $ \z -> min + (d * fromIntegral z) 93 | where d = r / (n-1) 94 | density = G.map (/(2 * r)) . idct $ G.zipWith f a (G.enumFromTo 0 (n-1)) 95 | where f b z = b * exp (sqr z * sqr pi * t_star * (-0.5)) 96 | !n = fromIntegral ni 97 | !ni = nextHighestPowerOfTwo n0 98 | !r = max - min 99 | a = dct . G.map (/ sum h) $ h 100 | where h = G.map (/ len) $ histogram_ ni min max xs 101 | !len = fromIntegral (G.length xs) 102 | !t_star = fromRoot (0.28 * len ** (-0.4)) . ridders def{ riddersTol = AbsTol 1e-14 } (0,0.1) 103 | $ \x -> x - (len * (2 * sqrt pi) * go 6 (f 7 x)) ** (-0.4) 104 | where 105 | f q t = 2 * pi ** (q*2) * sum (G.zipWith g iv a2v) 106 | where g i a2 = i ** q * a2 * exp ((-i) * sqr pi * t) 107 | a2v = G.map (sqr . (*0.5)) $ G.tail a 108 | iv = G.map sqr $ G.enumFromTo 1 (n-1) 109 | go s !h | s == 1 = h 110 | | otherwise = go (s-1) (f s time) 111 | where time = (2 * const * k0 / len / h) ** (2 / (3 + 2 * s)) 112 | const = (1 + 0.5 ** (s+0.5)) / 3 113 | k0 = U.product (G.enumFromThenTo 1 3 (2*s-1)) / m_sqrt_2_pi 114 | sqr x = x * x 115 | {-# INLINABLE kde_ #-} 116 | {-# SPECIAlIZE kde_ :: Int -> Double -> Double -> U.Vector Double -> (U.Vector Double, U.Vector Double) #-} 117 | {-# SPECIAlIZE kde_ :: Int -> Double -> Double -> V.Vector Double -> (V.Vector Double, V.Vector Double) #-} 118 | 119 | 120 | -- $references 121 | -- 122 | -- Botev. Z.I., Grotowski J.F., Kroese D.P. (2010). Kernel density 123 | -- estimation via diffusion. /Annals of Statistics/ 124 | -- 38(5):2916–2957. 125 | -------------------------------------------------------------------------------- /Statistics/Sample/Normalize.hs: -------------------------------------------------------------------------------- 1 | {-# LANGUAGE FlexibleContexts #-} 2 | 3 | -- | 4 | -- Module : Statistics.Sample.Normalize 5 | -- Copyright : (c) 2017 Gregory W. Schwartz 6 | -- License : BSD3 7 | -- 8 | -- Maintainer : gsch@mail.med.upenn.edu 9 | -- Stability : experimental 10 | -- Portability : portable 11 | -- 12 | -- Functions for normalizing samples. 13 | 14 | module Statistics.Sample.Normalize 15 | ( 16 | standardize 17 | ) where 18 | 19 | import Statistics.Sample 20 | import qualified Data.Vector.Generic as G 21 | import qualified Data.Vector as V 22 | import qualified Data.Vector.Unboxed as U 23 | import qualified Data.Vector.Storable as S 24 | 25 | -- | /O(n)/ Normalize a sample using standard scores: 26 | -- 27 | -- \[ z = \frac{x - \mu}{\sigma} \] 28 | -- 29 | -- Where μ is sample mean and σ is standard deviation computed from 30 | -- unbiased variance estimation. If sample to small to compute σ or 31 | -- it's equal to 0 @Nothing@ is returned. 32 | standardize :: (G.Vector v Double) => v Double -> Maybe (v Double) 33 | standardize xs 34 | | G.length xs < 2 = Nothing 35 | | sigma == 0 = Nothing 36 | | otherwise = Just $ G.map (\x -> (x - mu) / sigma) xs 37 | where 38 | mu = mean xs 39 | sigma = stdDev xs 40 | {-# INLINABLE standardize #-} 41 | {-# SPECIALIZE standardize :: V.Vector Double -> Maybe (V.Vector Double) #-} 42 | {-# SPECIALIZE standardize :: U.Vector Double -> Maybe (U.Vector Double) #-} 43 | {-# SPECIALIZE standardize :: S.Vector Double -> Maybe (S.Vector Double) #-} 44 | -------------------------------------------------------------------------------- /Statistics/Test/ChiSquared.hs: -------------------------------------------------------------------------------- 1 | {-# LANGUAGE FlexibleContexts #-} 2 | -- | Pearson's chi squared test. 3 | module Statistics.Test.ChiSquared ( 4 | chi2test 5 | , chi2testCont 6 | , module Statistics.Test.Types 7 | ) where 8 | 9 | import Prelude hiding (sum) 10 | 11 | import Statistics.Distribution 12 | import Statistics.Distribution.ChiSquared 13 | import Statistics.Function (square) 14 | import Statistics.Sample.Internal (sum) 15 | import Statistics.Test.Types 16 | import Statistics.Types 17 | import qualified Data.Vector as V 18 | import qualified Data.Vector.Generic as G 19 | import qualified Data.Vector.Unboxed as U 20 | 21 | 22 | 23 | -- | Generic form of Pearson chi squared tests for binned data. Data 24 | -- sample is supplied in form of tuples (observed quantity, 25 | -- expected number of events). Both must be positive. 26 | -- 27 | -- This test should be used only if all bins have expected values of 28 | -- at least 5. 29 | chi2test :: (G.Vector v (Int,Double), G.Vector v Double) 30 | => Int -- ^ Number of additional degrees of 31 | -- freedom. One degree of freedom 32 | -- is due to the fact that the are 33 | -- N observation in total and 34 | -- accounted for automatically. 35 | -> v (Int,Double) -- ^ Observation and expectation. 36 | -> Maybe (Test ChiSquared) 37 | chi2test ndf vec 38 | | ndf < 0 = error $ "Statistics.Test.ChiSquare.chi2test: negative NDF " ++ show ndf 39 | | n > 0 = Just Test 40 | { testSignificance = mkPValue $ complCumulative d chi2 41 | , testStatistics = chi2 42 | , testDistribution = chiSquared n 43 | } 44 | | otherwise = Nothing 45 | where 46 | n = G.length vec - ndf - 1 47 | chi2 = sum $ G.map (\(o,e) -> square (fromIntegral o - e) / e) vec 48 | d = chiSquared n 49 | {-# INLINABLE chi2test #-} 50 | {-# SPECIALIZE 51 | chi2test :: Int -> U.Vector (Int,Double) -> Maybe (Test ChiSquared) #-} 52 | {-# SPECIALIZE 53 | chi2test :: Int -> V.Vector (Int,Double) -> Maybe (Test ChiSquared) #-} 54 | 55 | 56 | -- | Chi squared test for data with normal errors. Data is supplied in 57 | -- form of pair (observation with error, and expectation). 58 | chi2testCont 59 | :: (G.Vector v (Estimate NormalErr Double, Double), G.Vector v Double) 60 | => Int -- ^ Number of additional 61 | -- degrees of freedom. 62 | -> v (Estimate NormalErr Double, Double) -- ^ Observation and expectation. 63 | -> Maybe (Test ChiSquared) 64 | chi2testCont ndf vec 65 | | ndf < 0 = error $ "Statistics.Test.ChiSquare.chi2testCont: negative NDF " ++ show ndf 66 | | n > 0 = Just Test 67 | { testSignificance = mkPValue $ complCumulative d chi2 68 | , testStatistics = chi2 69 | , testDistribution = chiSquared n 70 | } 71 | | otherwise = Nothing 72 | where 73 | n = G.length vec - ndf - 1 74 | chi2 = sum $ G.map (\(Estimate o (NormalErr s),e) -> square (o - e) / s) vec 75 | d = chiSquared n 76 | -------------------------------------------------------------------------------- /Statistics/Test/Internal.hs: -------------------------------------------------------------------------------- 1 | {-# LANGUAGE FlexibleContexts #-} 2 | module Statistics.Test.Internal ( 3 | rank 4 | , rankUnsorted 5 | , splitByTags 6 | ) where 7 | 8 | import Data.Ord 9 | import Data.Vector.Generic ((!)) 10 | import qualified Data.Vector.Generic as G 11 | import qualified Data.Vector.Unboxed as U 12 | import qualified Data.Vector.Generic.Mutable as M 13 | import Statistics.Function 14 | 15 | 16 | -- Private data type for unfolding 17 | data Rank v a = Rank { 18 | rankCnt :: {-# UNPACK #-} !Int -- Number of ranks to return 19 | , rankVal :: {-# UNPACK #-} !Double -- Rank to return 20 | , rankNum :: {-# UNPACK #-} !Double -- Current rank 21 | , rankVec :: v a -- Remaining vector 22 | } 23 | 24 | -- | Calculate rank of every element of sample. In case of ties ranks 25 | -- are averaged. Sample should be already sorted in ascending order. 26 | -- 27 | -- Rank is index of element in the sample, numeration starts from 1. 28 | -- In case of ties average of ranks of equal elements is assigned 29 | -- to each 30 | -- 31 | -- >>> import qualified Data.Vector.Unboxed as VU 32 | -- >>> rank (==) (VU.fromList [10,20,30::Int]) 33 | -- [1.0,2.0,3.0] 34 | -- 35 | -- >>> rank (==) (VU.fromList [10,10,10,30::Int]) 36 | -- [2.0,2.0,2.0,4.0] 37 | rank :: (G.Vector v a) 38 | => (a -> a -> Bool) -- ^ Equivalence relation 39 | -> v a -- ^ Vector to rank 40 | -> U.Vector Double 41 | rank eq vec = G.unfoldr go (Rank 0 (-1) 1 vec) 42 | where 43 | go (Rank 0 _ r v) 44 | | G.null v = Nothing 45 | | otherwise = 46 | case G.length h of 47 | 1 -> Just (r, Rank 0 0 (r+1) rest) 48 | n -> go Rank { rankCnt = n 49 | , rankVal = 0.5 * (r*2 + fromIntegral (n-1)) 50 | , rankNum = r + fromIntegral n 51 | , rankVec = rest 52 | } 53 | where 54 | (h,rest) = G.span (eq $ G.head v) v 55 | go (Rank n val r v) = Just (val, Rank (n-1) val r v) 56 | {-# INLINE rank #-} 57 | 58 | -- | Compute rank of every element of vector. Unlike rank it doesn't 59 | -- require sample to be sorted. 60 | rankUnsorted :: ( Ord a 61 | , G.Vector v a 62 | , G.Vector v Int 63 | , G.Vector v (Int, a) 64 | ) 65 | => v a 66 | -> U.Vector Double 67 | rankUnsorted xs = G.create $ do 68 | -- Put ranks into their original positions 69 | -- NOTE: backpermute will do wrong thing 70 | vec <- M.new n 71 | for 0 n $ \i -> 72 | M.unsafeWrite vec (index ! i) (ranks ! i) 73 | return vec 74 | where 75 | n = G.length xs 76 | -- Calculate ranks for sorted array 77 | ranks = rank (==) sorted 78 | -- Sort vector and retain original indices of elements 79 | (index, sorted) 80 | = G.unzip 81 | $ sortBy (comparing snd) 82 | $ indexed xs 83 | {-# INLINE rankUnsorted #-} 84 | 85 | 86 | -- | Split tagged vector 87 | splitByTags :: (G.Vector v a, G.Vector v (Bool,a)) => v (Bool,a) -> (v a, v a) 88 | splitByTags vs = (G.map snd a, G.map snd b) 89 | where 90 | (a,b) = G.unstablePartition fst vs 91 | {-# INLINE splitByTags #-} 92 | -------------------------------------------------------------------------------- /Statistics/Test/KruskalWallis.hs: -------------------------------------------------------------------------------- 1 | -- | 2 | -- Module : Statistics.Test.KruskalWallis 3 | -- Copyright : (c) 2014 Danny Navarro 4 | -- License : BSD3 5 | -- 6 | -- Maintainer : bos@serpentine.com 7 | -- Stability : experimental 8 | -- Portability : portable 9 | -- 10 | module Statistics.Test.KruskalWallis 11 | ( -- * Kruskal-Wallis test 12 | kruskalWallisTest 13 | -- ** Building blocks 14 | , kruskalWallisRank 15 | , kruskalWallis 16 | , module Statistics.Test.Types 17 | ) where 18 | 19 | import Data.Ord (comparing) 20 | import qualified Data.Vector.Unboxed as U 21 | import Statistics.Function (sort, sortBy, square) 22 | import Statistics.Distribution (complCumulative) 23 | import Statistics.Distribution.ChiSquared (chiSquared) 24 | import Statistics.Types 25 | import Statistics.Test.Types 26 | import Statistics.Test.Internal (rank) 27 | import Statistics.Sample 28 | import qualified Statistics.Sample.Internal as Sample(sum) 29 | 30 | 31 | -- | Kruskal-Wallis ranking. 32 | -- 33 | -- All values are replaced by the absolute rank in the combined samples. 34 | -- 35 | -- The samples and values need not to be ordered but the values in the result 36 | -- are ordered. Assigned ranks (ties are given their average rank). 37 | kruskalWallisRank :: (U.Unbox a, Ord a) => [U.Vector a] -> [U.Vector Double] 38 | kruskalWallisRank samples = groupByTags 39 | . sortBy (comparing fst) 40 | . U.zip tags 41 | $ rank (==) joinSample 42 | where 43 | (tags,joinSample) = U.unzip 44 | . sortBy (comparing snd) 45 | $ foldMap (uncurry tagSample) $ zip [(1::Int)..] samples 46 | tagSample t = U.map (\x -> (t,x)) 47 | 48 | groupByTags xs 49 | | U.null xs = [] 50 | | otherwise = sort (U.map snd ys) : groupByTags zs 51 | where 52 | (ys,zs) = U.span ((==) (fst $ U.head xs) . fst) xs 53 | 54 | 55 | -- | The Kruskal-Wallis Test. 56 | -- 57 | -- In textbooks the output value is usually represented by 'K' or 'H'. This 58 | -- function already does the ranking. 59 | kruskalWallis :: (U.Unbox a, Ord a) => [U.Vector a] -> Double 60 | kruskalWallis samples = (nTot - 1) * numerator / denominator 61 | where 62 | -- Total number of elements in all samples 63 | nTot = fromIntegral $ sumWith rsamples U.length 64 | -- Average rank of all samples 65 | avgRank = (nTot + 1) / 2 66 | -- 67 | numerator = sumWith rsamples $ \sample -> 68 | let n = fromIntegral $ U.length sample 69 | in n * square (mean sample - avgRank) 70 | denominator = sumWith rsamples $ \sample -> 71 | Sample.sum $ U.map (\r -> square (r - avgRank)) sample 72 | 73 | rsamples = kruskalWallisRank samples 74 | 75 | 76 | -- | Perform Kruskal-Wallis Test for the given samples and required 77 | -- significance. For additional information check 'kruskalWallis'. This is just 78 | -- a helper function. 79 | -- 80 | -- It uses /Chi-Squared/ distribution for approximation as long as the sizes are 81 | -- larger than 5. Otherwise the test returns 'Nothing'. 82 | kruskalWallisTest :: (Ord a, U.Unbox a) => [U.Vector a] -> Maybe (Test ()) 83 | kruskalWallisTest [] = Nothing 84 | kruskalWallisTest samples 85 | -- We use chi-squared approximation here 86 | | all (>4) ns = Just Test { testSignificance = mkPValue $ complCumulative d k 87 | , testStatistics = k 88 | , testDistribution = () 89 | } 90 | | otherwise = Nothing 91 | where 92 | k = kruskalWallis samples 93 | ns = map U.length samples 94 | d = chiSquared (length ns - 1) 95 | 96 | -- * Helper functions 97 | 98 | sumWith :: Num a => [Sample] -> (Sample -> a) -> a 99 | sumWith samples f = Prelude.sum $ fmap f samples 100 | {-# INLINE sumWith #-} 101 | -------------------------------------------------------------------------------- /Statistics/Test/Runs.hs: -------------------------------------------------------------------------------- 1 | {-# LANGUAGE BangPatterns #-} 2 | {-# LANGUAGE FlexibleContexts #-} 3 | -- | Goodness of fit runs tests. 4 | module Statistics.Test.Runs ( 5 | runsTest 6 | , module Statistics.Test.Types 7 | ) where 8 | 9 | import qualified Data.Vector.Generic as G 10 | import qualified Data.Vector.Unboxed as U 11 | import Numeric.SpecFunctions (choose) 12 | import Prelude hiding (sum) 13 | 14 | import Statistics.Sample.Internal (sum) 15 | import Statistics.Test.Types 16 | import Statistics.Types 17 | 18 | 19 | -- | Goodness of fit test for binned data. It uses only sign of 20 | -- deviations of observations from their expectations. Null 21 | -- hypothesis is that all possible patterns of sign occurrences are 22 | -- equiprobable. 23 | -- 24 | -- It's asymptotically independent from chi-square test. So their 25 | -- results could be directly combined 26 | runsTest :: (G.Vector v Bool) => v Bool -> Test () 27 | {-# INLINE runsTest #-} 28 | runsTest v 29 | = Test { testSignificance = mkPValue $ cumulativeProb n m r 30 | , testStatistics = fromIntegral r 31 | , testDistribution = () 32 | } 33 | where 34 | (n,m,r) = computeRuns v 35 | 36 | -- Compute number of positive elements, negative elements and runs 37 | computeRuns :: (G.Vector v Bool) => v Bool -> (Int,Int,Int) 38 | {-# INLINE computeRuns #-} 39 | computeRuns v 40 | = fini $ G.foldl' step (0,0,0,Nothing) v 41 | where 42 | step (!nP,!nM,!nR,!old) f = 43 | ( if f then nP+1 else nP 44 | , if f then nM else nM+1 45 | , if old == Just f then nR else nR+1 46 | , Just f 47 | ) 48 | fini (nP,nM,nR,_) = (nP,nM,nR) 49 | 50 | -- Compute denormalized probability of getting R runs given N positive 51 | -- and M positive elements 52 | denormProbability :: Int -> Int -> Int -> Double 53 | denormProbability n m r 54 | | even r = 2 * ((m-1) `choose` (s-1)) * ((n-1) `choose` (s-1)) 55 | | otherwise = ((m-1) `choose` (s-1)) * ((n-1) `choose` (s-2)) 56 | + ((m-1) `choose` (s-2)) * ((n-1) `choose` (s-1)) 57 | where 58 | s = r `quot` 2 59 | 60 | -- Probability of getting R<=R[observed] 61 | cumulativeProb :: Int -> Int -> Int -> Double 62 | cumulativeProb n m r 63 | = min 1 64 | $ sum (U.map (denormProbability n m) $ U.enumFromTo 1 r) 65 | / ((n+m) `choose` m) 66 | -------------------------------------------------------------------------------- /Statistics/Test/StudentT.hs: -------------------------------------------------------------------------------- 1 | {-# LANGUAGE FlexibleContexts, Rank2Types, ScopedTypeVariables #-} 2 | -- | Student's T-test is for assessing whether two samples have 3 | -- different mean. This module contain several variations of 4 | -- T-test. It's a parametric tests and assumes that samples are 5 | -- normally distributed. 6 | module Statistics.Test.StudentT 7 | ( 8 | studentTTest 9 | , welchTTest 10 | , pairedTTest 11 | , module Statistics.Test.Types 12 | ) where 13 | 14 | import Statistics.Distribution hiding (mean) 15 | import Statistics.Distribution.StudentT 16 | import Statistics.Sample (mean, varianceUnbiased) 17 | import Statistics.Test.Types 18 | import Statistics.Types (mkPValue,PValue) 19 | import Statistics.Function (square) 20 | import qualified Data.Vector.Generic as G 21 | import qualified Data.Vector.Unboxed as U 22 | import qualified Data.Vector.Storable as S 23 | import qualified Data.Vector as V 24 | 25 | 26 | 27 | -- | Two-sample Student's t-test. It assumes that both samples are 28 | -- normally distributed and have same variance. Returns @Nothing@ if 29 | -- sample sizes are not sufficient. 30 | studentTTest :: (G.Vector v Double) 31 | => PositionTest -- ^ one- or two-tailed test 32 | -> v Double -- ^ Sample A 33 | -> v Double -- ^ Sample B 34 | -> Maybe (Test StudentT) 35 | studentTTest test sample1 sample2 36 | | G.length sample1 < 2 || G.length sample2 < 2 = Nothing 37 | | otherwise = Just Test 38 | { testSignificance = significance test t ndf 39 | , testStatistics = t 40 | , testDistribution = studentT ndf 41 | } 42 | where 43 | (t, ndf) = tStatistics True sample1 sample2 44 | {-# INLINABLE studentTTest #-} 45 | {-# SPECIALIZE studentTTest :: PositionTest -> U.Vector Double -> U.Vector Double -> Maybe (Test StudentT) #-} 46 | {-# SPECIALIZE studentTTest :: PositionTest -> S.Vector Double -> S.Vector Double -> Maybe (Test StudentT) #-} 47 | {-# SPECIALIZE studentTTest :: PositionTest -> V.Vector Double -> V.Vector Double -> Maybe (Test StudentT) #-} 48 | 49 | -- | Two-sample Welch's t-test. It assumes that both samples are 50 | -- normally distributed but doesn't assume that they have same 51 | -- variance. Returns @Nothing@ if sample sizes are not sufficient. 52 | welchTTest :: (G.Vector v Double) 53 | => PositionTest -- ^ one- or two-tailed test 54 | -> v Double -- ^ Sample A 55 | -> v Double -- ^ Sample B 56 | -> Maybe (Test StudentT) 57 | welchTTest test sample1 sample2 58 | | G.length sample1 < 2 || G.length sample2 < 2 = Nothing 59 | | otherwise = Just Test 60 | { testSignificance = significance test t ndf 61 | , testStatistics = t 62 | , testDistribution = studentT ndf 63 | } 64 | where 65 | (t, ndf) = tStatistics False sample1 sample2 66 | {-# INLINABLE welchTTest #-} 67 | {-# SPECIALIZE welchTTest :: PositionTest -> U.Vector Double -> U.Vector Double -> Maybe (Test StudentT) #-} 68 | {-# SPECIALIZE welchTTest :: PositionTest -> S.Vector Double -> S.Vector Double -> Maybe (Test StudentT) #-} 69 | {-# SPECIALIZE welchTTest :: PositionTest -> V.Vector Double -> V.Vector Double -> Maybe (Test StudentT) #-} 70 | 71 | -- | Paired two-sample t-test. Two samples are paired in a 72 | -- within-subject design. Returns @Nothing@ if sample size is not 73 | -- sufficient. 74 | pairedTTest :: forall v. (G.Vector v (Double, Double)) 75 | => PositionTest -- ^ one- or two-tailed test 76 | -> v (Double, Double) -- ^ paired samples 77 | -> Maybe (Test StudentT) 78 | pairedTTest test sample 79 | | G.length sample < 2 = Nothing 80 | | otherwise = Just Test 81 | { testSignificance = significance test t ndf 82 | , testStatistics = t 83 | , testDistribution = studentT ndf 84 | } 85 | where 86 | (t, ndf) = tStatisticsPaired sample 87 | {-# INLINABLE pairedTTest #-} 88 | {-# SPECIALIZE pairedTTest :: PositionTest -> U.Vector (Double,Double) -> Maybe (Test StudentT) #-} 89 | {-# SPECIALIZE pairedTTest :: PositionTest -> V.Vector (Double,Double) -> Maybe (Test StudentT) #-} 90 | 91 | 92 | ------------------------------------------------------------------------------- 93 | 94 | significance :: PositionTest -- ^ one- or two-tailed 95 | -> Double -- ^ t statistics 96 | -> Double -- ^ degree of freedom 97 | -> PValue Double -- ^ p-value 98 | significance test t df = 99 | case test of 100 | -- Here we exploit symmetry of T-distribution and calculate small tail 101 | SamplesDiffer -> mkPValue $ 2 * tailArea (negate (abs t)) 102 | AGreater -> mkPValue $ tailArea (negate t) 103 | BGreater -> mkPValue $ tailArea t 104 | where 105 | tailArea = cumulative (studentT df) 106 | 107 | 108 | -- Calculate T statistics for two samples 109 | tStatistics :: (G.Vector v Double) 110 | => Bool -- variance equality 111 | -> v Double 112 | -> v Double 113 | -> (Double, Double) 114 | {-# INLINE tStatistics #-} 115 | tStatistics varequal sample1 sample2 = (t, ndf) 116 | where 117 | -- t-statistics 118 | t = (m1 - m2) / sqrt ( 119 | if varequal 120 | then ((n1 - 1) * s1 + (n2 - 1) * s2) / (n1 + n2 - 2) * (1 / n1 + 1 / n2) 121 | else s1 / n1 + s2 / n2) 122 | 123 | -- degree of freedom 124 | ndf | varequal = n1 + n2 - 2 125 | | otherwise = square (s1 / n1 + s2 / n2) 126 | / (square s1 / (square n1 * (n1 - 1)) + square s2 / (square n2 * (n2 - 1))) 127 | -- statistics of two samples 128 | n1 = fromIntegral $ G.length sample1 129 | n2 = fromIntegral $ G.length sample2 130 | m1 = mean sample1 131 | m2 = mean sample2 132 | s1 = varianceUnbiased sample1 133 | s2 = varianceUnbiased sample2 134 | 135 | 136 | -- Calculate T-statistics for paired sample 137 | tStatisticsPaired :: (G.Vector v (Double, Double)) 138 | => v (Double, Double) 139 | -> (Double, Double) 140 | {-# INLINE tStatisticsPaired #-} 141 | tStatisticsPaired sample = (t, ndf) 142 | where 143 | -- t-statistics 144 | t = let d = U.map (uncurry (-)) $ G.convert sample 145 | sumd = U.sum d 146 | in sumd / sqrt ((n * U.sum (U.map square d) - square sumd) / ndf) 147 | -- degree of freedom 148 | ndf = n - 1 149 | n = fromIntegral $ G.length sample 150 | -------------------------------------------------------------------------------- /Statistics/Test/Types.hs: -------------------------------------------------------------------------------- 1 | {-# LANGUAGE DeriveFunctor, DeriveDataTypeable,DeriveGeneric #-} 2 | module Statistics.Test.Types ( 3 | Test(..) 4 | , isSignificant 5 | , TestResult(..) 6 | , significant 7 | , PositionTest(..) 8 | ) where 9 | 10 | import Control.DeepSeq (NFData(..)) 11 | import Control.Monad (liftM3) 12 | import Data.Aeson (FromJSON, ToJSON) 13 | import Data.Binary (Binary (..)) 14 | import Data.Data (Typeable, Data) 15 | import GHC.Generics 16 | 17 | import Statistics.Types (PValue) 18 | 19 | 20 | -- | Result of hypothesis testing 21 | data TestResult = Significant -- ^ Null hypothesis should be rejected 22 | | NotSignificant -- ^ Data is compatible with hypothesis 23 | deriving (Eq,Ord,Show,Typeable,Data,Generic) 24 | 25 | instance Binary TestResult where 26 | get = do 27 | sig <- get 28 | if sig then return Significant else return NotSignificant 29 | put = put . (== Significant) 30 | instance FromJSON TestResult 31 | instance ToJSON TestResult 32 | instance NFData TestResult 33 | 34 | 35 | 36 | -- | Result of statistical test. 37 | data Test distr = Test 38 | { testSignificance :: !(PValue Double) 39 | -- ^ Probability of getting value of test statistics at least as 40 | -- extreme as measured. 41 | , testStatistics :: !Double 42 | -- ^ Statistic used for test. 43 | , testDistribution :: distr 44 | -- ^ Distribution of test statistics if null hypothesis is correct. 45 | } 46 | deriving (Eq,Ord,Show,Typeable,Data,Generic,Functor) 47 | 48 | instance (Binary d) => Binary (Test d) where 49 | get = liftM3 Test get get get 50 | put (Test sign stat distr) = put sign >> put stat >> put distr 51 | instance (FromJSON d) => FromJSON (Test d) 52 | instance (ToJSON d) => ToJSON (Test d) 53 | instance (NFData d) => NFData (Test d) where 54 | rnf (Test _ _ a) = rnf a 55 | 56 | -- | Check whether test is significant for given p-value. 57 | isSignificant :: PValue Double -> Test d -> TestResult 58 | isSignificant p t 59 | = significant $ p >= testSignificance t 60 | 61 | 62 | -- | Test type for test which compare positional (mean,median etc.) 63 | -- information of samples. 64 | data PositionTest 65 | = SamplesDiffer 66 | -- ^ Test whether samples differ in position. Null hypothesis is 67 | -- samples are not different 68 | | AGreater 69 | -- ^ Test if first sample (A) is larger than second (B). Null 70 | -- hypothesis is first sample is not larger than second. 71 | | BGreater 72 | -- ^ Test if second sample is larger than first. 73 | deriving (Eq,Ord,Show,Typeable,Data,Generic) 74 | 75 | instance Binary PositionTest where 76 | get = do 77 | i <- get 78 | case (i :: Int) of 79 | 0 -> return SamplesDiffer 80 | 1 -> return AGreater 81 | 2 -> return BGreater 82 | _ -> fail "Invalid PositionTest" 83 | put SamplesDiffer = put (0 :: Int) 84 | put AGreater = put (1 :: Int) 85 | put BGreater = put (2 :: Int) 86 | instance FromJSON PositionTest 87 | instance ToJSON PositionTest 88 | instance NFData PositionTest 89 | 90 | -- | significant if parameter is 'True', not significant otherwise 91 | significant :: Bool -> TestResult 92 | significant True = Significant 93 | significant False = NotSignificant 94 | -------------------------------------------------------------------------------- /Statistics/Types/Internal.hs: -------------------------------------------------------------------------------- 1 | -- | 2 | -- Module : Statistics.Types.Internal 3 | -- Copyright : (c) 2009 Bryan O'Sullivan 4 | -- License : BSD3 5 | -- 6 | -- Maintainer : bos@serpentine.com 7 | -- Stability : experimental 8 | -- Portability : portable 9 | -- 10 | -- Types for working with statistics. 11 | module Statistics.Types.Internal where 12 | 13 | 14 | import qualified Data.Vector.Unboxed as U (Vector) 15 | 16 | -- | Sample data. 17 | type Sample = U.Vector Double 18 | 19 | -- | Sample with weights. First element of sample is data, second is weight 20 | type WeightedSample = U.Vector (Double,Double) 21 | 22 | -- | Weights for affecting the importance of elements of a sample. 23 | type Weights = U.Vector Double 24 | 25 | -------------------------------------------------------------------------------- /bench-papi/Bench.hs: -------------------------------------------------------------------------------- 1 | -- | 2 | -- Here we reexport definitions of tasty-bench 3 | module Bench 4 | ( whnf 5 | , nf 6 | , nfIO 7 | , whnfIO 8 | , bench 9 | , bgroup 10 | , defaultMain 11 | , benchIngredients 12 | ) where 13 | 14 | import Test.Tasty.PAPI 15 | -------------------------------------------------------------------------------- /bench-time/Bench.hs: -------------------------------------------------------------------------------- 1 | -- | 2 | -- Here we reexport definitions of tasty-bench 3 | module Bench 4 | ( whnf 5 | , nf 6 | , nfIO 7 | , whnfIO 8 | , bench 9 | , bgroup 10 | , defaultMain 11 | , benchIngredients 12 | ) where 13 | 14 | import Test.Tasty.Bench 15 | -------------------------------------------------------------------------------- /benchmark/bench.hs: -------------------------------------------------------------------------------- 1 | import Data.Complex 2 | import Statistics.Sample 3 | import Statistics.Transform 4 | import Statistics.Correlation 5 | import System.Random.MWC 6 | import qualified Data.Vector.Unboxed as VU 7 | import qualified Data.Vector.Unboxed.Mutable as MVU 8 | 9 | import Bench 10 | 11 | 12 | -- Test sample 13 | sample :: VU.Vector Double 14 | sample = VU.create $ do g <- create 15 | MVU.replicateM 10000 (uniform g) 16 | 17 | -- Weighted test sample 18 | sampleW :: VU.Vector (Double,Double) 19 | sampleW = VU.zip sample (VU.reverse sample) 20 | 21 | -- Complex vector for FFT tests 22 | sampleC :: VU.Vector (Complex Double) 23 | sampleC = VU.zipWith (:+) sample (VU.reverse sample) 24 | 25 | 26 | -- Simple benchmark for functions from Statistics.Sample 27 | main :: IO () 28 | main = 29 | defaultMain 30 | [ bgroup "sample" 31 | [ bench "range" $ nf (\x -> range x) sample 32 | -- Mean 33 | , bench "mean" $ nf (\x -> mean x) sample 34 | , bench "meanWeighted" $ nf (\x -> meanWeighted x) sampleW 35 | , bench "harmonicMean" $ nf (\x -> harmonicMean x) sample 36 | , bench "geometricMean" $ nf (\x -> geometricMean x) sample 37 | -- Variance 38 | , bench "variance" $ nf (\x -> variance x) sample 39 | , bench "varianceUnbiased" $ nf (\x -> varianceUnbiased x) sample 40 | , bench "varianceWeighted" $ nf (\x -> varianceWeighted x) sampleW 41 | -- Correlation 42 | , bench "pearson" $ nf pearson sampleW 43 | , bench "covariance" $ nf covariance sampleW 44 | , bench "correlation" $ nf correlation sampleW 45 | , bench "covariance2" $ nf (covariance2 sample) sample 46 | , bench "correlation2" $ nf (correlation2 sample) sample 47 | -- Other 48 | , bench "stdDev" $ nf (\x -> stdDev x) sample 49 | , bench "skewness" $ nf (\x -> skewness x) sample 50 | , bench "kurtosis" $ nf (\x -> kurtosis x) sample 51 | -- Central moments 52 | , bench "C.M. 2" $ nf (\x -> centralMoment 2 x) sample 53 | , bench "C.M. 3" $ nf (\x -> centralMoment 3 x) sample 54 | , bench "C.M. 4" $ nf (\x -> centralMoment 4 x) sample 55 | , bench "C.M. 5" $ nf (\x -> centralMoment 5 x) sample 56 | ] 57 | , bgroup "FFT" 58 | [ bgroup "fft" 59 | [ bench (show n) $ whnf fft (VU.take n sampleC) | n <- fftSizes ] 60 | , bgroup "ifft" 61 | [ bench (show n) $ whnf ifft (VU.take n sampleC) | n <- fftSizes ] 62 | , bgroup "dct" 63 | [ bench (show n) $ whnf dct (VU.take n sample) | n <- fftSizes ] 64 | , bgroup "dct_" 65 | [ bench (show n) $ whnf dct_ (VU.take n sampleC) | n <- fftSizes ] 66 | , bgroup "idct" 67 | [ bench (show n) $ whnf idct (VU.take n sample) | n <- fftSizes ] 68 | , bgroup "idct_" 69 | [ bench (show n) $ whnf idct_ (VU.take n sampleC) | n <- fftSizes ] 70 | ] 71 | ] 72 | 73 | 74 | fftSizes :: [Int] 75 | fftSizes = [32,128,512,2048] 76 | -------------------------------------------------------------------------------- /cabal.project: -------------------------------------------------------------------------------- 1 | packages: 2 | statistics.cabal 3 | 4 | -------------------------------------------------------------------------------- /dense-linear-algebra/LICENSE: -------------------------------------------------------------------------------- 1 | Copyright (c) 2009, 2010 Bryan O'Sullivan 2 | All rights reserved. 3 | 4 | Redistribution and use in source and binary forms, with or without 5 | modification, are permitted provided that the following conditions 6 | are met: 7 | 8 | * Redistributions of source code must retain the above copyright 9 | notice, this list of conditions and the following disclaimer. 10 | 11 | * Redistributions in binary form must reproduce the above 12 | copyright notice, this list of conditions and the following 13 | disclaimer in the documentation and/or other materials provided 14 | with the distribution. 15 | 16 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 17 | "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 18 | LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 19 | A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 20 | OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 21 | SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 22 | LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 23 | DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 24 | THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 25 | (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 26 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 27 | -------------------------------------------------------------------------------- /dense-linear-algebra/README.md: -------------------------------------------------------------------------------- 1 | # statistics-dense-linear-algebra 2 | 3 | [![Build Status](https://travis-ci.org/githubuser/statistics-dense-linear-algebra.png)](https://travis-ci.org/githubuser/statistics-dense-linear-algebra) 4 | 5 | The dense linear algebra functionality and related modules, extracted from `statistics-0.14.0.2` 6 | -------------------------------------------------------------------------------- /dense-linear-algebra/dense-linear-algebra.cabal: -------------------------------------------------------------------------------- 1 | name: dense-linear-algebra 2 | version: 0.1.0.0 3 | synopsis: Simple and incomplete pure haskell implementation of linear algebra 4 | description: 5 | This library is simply collection of linear-algebra related modules 6 | split from statistics library. 7 | 8 | license: BSD2 9 | license-file: LICENSE 10 | author: Bryan O'Sullivan 11 | maintainer: Alexey Khudaykov 12 | copyright: 2018 Author name here 13 | category: Math, Statistics, Numeric 14 | build-type: Simple 15 | extra-source-files: README.md 16 | cabal-version: >=1.10 17 | 18 | tested-with: 19 | GHC ==7.4.2 20 | || ==7.6.3 21 | || ==7.8.4 22 | || ==7.10.3 23 | || ==8.0.2 24 | || ==8.2.2 25 | || ==8.4.4 26 | || ==8.6.5 27 | || ==8.8.1 28 | , GHCJS ==8.4 29 | 30 | library 31 | default-language: Haskell2010 32 | ghc-options: -Wall 33 | hs-source-dirs: src 34 | exposed-modules: Statistics.Matrix 35 | Statistics.Matrix.Algorithms 36 | Statistics.Matrix.Function 37 | Statistics.Matrix.Mutable 38 | Statistics.Matrix.Types 39 | build-depends: base >= 4.5 && < 5 40 | , deepseq >= 1.1.0.2 41 | , math-functions >= 0.1.7 42 | , primitive >= 0.3 43 | , vector >= 0.10 44 | , vector-algorithms >= 0.4 45 | , vector-th-unbox 46 | , vector-binary-instances >= 0.2.1 47 | 48 | test-suite spec 49 | default-language: Haskell2010 50 | ghc-options: -Wall 51 | type: exitcode-stdio-1.0 52 | hs-source-dirs: test 53 | main-is: LibSpec.hs 54 | build-depends: base 55 | , dense-linear-algebra 56 | , hspec 57 | , QuickCheck 58 | 59 | source-repository head 60 | type: git 61 | location: https://github.com/haskell/statistics 62 | -------------------------------------------------------------------------------- /dense-linear-algebra/src/Statistics/Matrix/Algorithms.hs: -------------------------------------------------------------------------------- 1 | -- | 2 | -- Module : Statistics.Matrix.Algorithms 3 | -- Copyright : 2014 Bryan O'Sullivan 4 | -- License : BSD3 5 | -- 6 | -- Useful matrix functions. 7 | 8 | module Statistics.Matrix.Algorithms 9 | ( 10 | qr 11 | ) where 12 | 13 | import Control.Applicative ((<$>), (<*>)) 14 | import Control.Monad.ST (ST, runST) 15 | import Prelude hiding (replicate) 16 | import Numeric.Sum (sumVector,kbn) 17 | import Statistics.Matrix (Matrix, column, dimension, for, norm) 18 | import qualified Statistics.Matrix.Mutable as M 19 | import qualified Data.Vector.Unboxed as U 20 | 21 | -- | /O(r*c)/ Compute the QR decomposition of a matrix. 22 | -- The result returned is the matrices (/q/,/r/). 23 | qr :: Matrix -> (Matrix, Matrix) 24 | qr mat = runST $ do 25 | let (m,n) = dimension mat 26 | r <- M.replicate n n 0 27 | a <- M.thaw mat 28 | for 0 n $ \j -> do 29 | cn <- M.immutably a $ \aa -> norm (column aa j) 30 | M.unsafeWrite r j j cn 31 | for 0 m $ \i -> M.unsafeModify a i j (/ cn) 32 | for (j+1) n $ \jj -> do 33 | p <- innerProduct a j jj 34 | M.unsafeWrite r j jj p 35 | for 0 m $ \i -> do 36 | aij <- M.unsafeRead a i j 37 | M.unsafeModify a i jj $ subtract (p * aij) 38 | (,) <$> M.unsafeFreeze a <*> M.unsafeFreeze r 39 | 40 | innerProduct :: M.MMatrix s -> Int -> Int -> ST s Double 41 | innerProduct mmat j k = M.immutably mmat $ \mat -> 42 | sumVector kbn $ U.zipWith (*) (column mat j) (column mat k) 43 | -------------------------------------------------------------------------------- /dense-linear-algebra/src/Statistics/Matrix/Function.hs: -------------------------------------------------------------------------------- 1 | {-# LANGUAGE BangPatterns #-} 2 | -- | 3 | module Statistics.Matrix.Function where 4 | 5 | -- | Multiply a number by itself. 6 | square :: Double -> Double 7 | square x = x * x 8 | 9 | -- | Simple for loop. Counts from /start/ to /end/-1. 10 | for :: Monad m => Int -> Int -> (Int -> m ()) -> m () 11 | for n0 !n f = loop n0 12 | where 13 | loop i | i == n = return () 14 | | otherwise = f i >> loop (i+1) 15 | {-# INLINE for #-} 16 | -------------------------------------------------------------------------------- /dense-linear-algebra/src/Statistics/Matrix/Mutable.hs: -------------------------------------------------------------------------------- 1 | -- | 2 | -- Module : Statistics.Matrix.Mutable 3 | -- Copyright : (c) 2014 Bryan O'Sullivan 4 | -- License : BSD3 5 | -- 6 | -- Basic mutable matrix operations. 7 | 8 | module Statistics.Matrix.Mutable 9 | ( 10 | MMatrix(..) 11 | , MVector 12 | , replicate 13 | , thaw 14 | , bounds 15 | , unsafeNew 16 | , unsafeFreeze 17 | , unsafeRead 18 | , unsafeWrite 19 | , unsafeModify 20 | , immutably 21 | , unsafeBounds 22 | ) where 23 | 24 | import Control.Applicative ((<$>)) 25 | import Control.DeepSeq (NFData(..)) 26 | import Control.Monad.ST (ST) 27 | import Statistics.Matrix.Types (Matrix(..), MMatrix(..), MVector) 28 | import qualified Data.Vector.Unboxed as U 29 | import qualified Data.Vector.Unboxed.Mutable as M 30 | import Prelude hiding (replicate) 31 | 32 | replicate :: Int -> Int -> Double -> ST s (MMatrix s) 33 | replicate r c k = MMatrix r c <$> M.replicate (r*c) k 34 | 35 | thaw :: Matrix -> ST s (MMatrix s) 36 | thaw (Matrix r c v) = MMatrix r c <$> U.thaw v 37 | 38 | unsafeFreeze :: MMatrix s -> ST s Matrix 39 | unsafeFreeze (MMatrix r c mv) = Matrix r c <$> U.unsafeFreeze mv 40 | 41 | -- | Allocate new matrix. Matrix content is not initialized hence unsafe. 42 | unsafeNew :: Int -- ^ Number of row 43 | -> Int -- ^ Number of columns 44 | -> ST s (MMatrix s) 45 | unsafeNew r c 46 | | r < 0 = error "Statistics.Matrix.Mutable.unsafeNew: negative number of rows" 47 | | c < 0 = error "Statistics.Matrix.Mutable.unsafeNew: negative number of columns" 48 | | otherwise = do 49 | vec <- M.new (r*c) 50 | return $ MMatrix r c vec 51 | 52 | unsafeRead :: MMatrix s -> Int -> Int -> ST s Double 53 | unsafeRead mat r c = unsafeBounds mat r c M.unsafeRead 54 | {-# INLINE unsafeRead #-} 55 | 56 | unsafeWrite :: MMatrix s -> Int -> Int -> Double -> ST s () 57 | unsafeWrite mat row col k = unsafeBounds mat row col $ \v i -> 58 | M.unsafeWrite v i k 59 | {-# INLINE unsafeWrite #-} 60 | 61 | unsafeModify :: MMatrix s -> Int -> Int -> (Double -> Double) -> ST s () 62 | unsafeModify mat row col f = unsafeBounds mat row col $ \v i -> do 63 | k <- M.unsafeRead v i 64 | M.unsafeWrite v i (f k) 65 | {-# INLINE unsafeModify #-} 66 | 67 | -- | Given row and column numbers, calculate the offset into the flat 68 | -- row-major vector. 69 | bounds :: MMatrix s -> Int -> Int -> (MVector s -> Int -> r) -> r 70 | bounds (MMatrix rs cs mv) r c k 71 | | r < 0 || r >= rs = error "row out of bounds" 72 | | c < 0 || c >= cs = error "column out of bounds" 73 | | otherwise = k mv $! r * cs + c 74 | {-# INLINE bounds #-} 75 | 76 | -- | Given row and column numbers, calculate the offset into the flat 77 | -- row-major vector, without checking. 78 | unsafeBounds :: MMatrix s -> Int -> Int -> (MVector s -> Int -> r) -> r 79 | unsafeBounds (MMatrix _ cs mv) r c k = k mv $! r * cs + c 80 | {-# INLINE unsafeBounds #-} 81 | 82 | immutably :: NFData a => MMatrix s -> (Matrix -> a) -> ST s a 83 | immutably mmat f = do 84 | k <- f <$> unsafeFreeze mmat 85 | rnf k `seq` return k 86 | {-# INLINE immutably #-} 87 | -------------------------------------------------------------------------------- /dense-linear-algebra/src/Statistics/Matrix/Types.hs: -------------------------------------------------------------------------------- 1 | -- | 2 | -- Module : Statistics.Matrix.Types 3 | -- Copyright : 2014 Bryan O'Sullivan 4 | -- License : BSD3 5 | -- 6 | -- Basic matrix operations. 7 | -- 8 | -- There isn't a widely used matrix package for Haskell yet, so 9 | -- we implement the necessary minimum here. 10 | 11 | module Statistics.Matrix.Types 12 | ( 13 | Vector 14 | , MVector 15 | , Matrix(..) 16 | , MMatrix(..) 17 | , debug 18 | ) where 19 | 20 | import Data.Char (isSpace) 21 | import Numeric (showFFloat) 22 | import qualified Data.Vector.Unboxed as U 23 | import qualified Data.Vector.Unboxed.Mutable as M 24 | 25 | type Vector = U.Vector Double 26 | type MVector s = M.MVector s Double 27 | 28 | -- | Two-dimensional matrix, stored in row-major order. 29 | data Matrix = Matrix { 30 | rows :: {-# UNPACK #-} !Int -- ^ Rows of matrix. 31 | , cols :: {-# UNPACK #-} !Int -- ^ Columns of matrix. 32 | , _vector :: !Vector -- ^ Matrix data. 33 | } deriving (Eq) 34 | 35 | -- | Two-dimensional mutable matrix, stored in row-major order. 36 | data MMatrix s = MMatrix 37 | {-# UNPACK #-} !Int 38 | {-# UNPACK #-} !Int 39 | !(MVector s) 40 | 41 | -- The Show instance is useful only for debugging. 42 | instance Show Matrix where 43 | show = debug 44 | 45 | debug :: Matrix -> String 46 | debug (Matrix r c vs) = unlines $ zipWith (++) (hdr0 : repeat hdr) rrows 47 | where 48 | rrows = map (cleanEnd . unwords) . split $ zipWith (++) ldone tdone 49 | hdr0 = show (r,c) ++ " " 50 | hdr = replicate (length hdr0) ' ' 51 | pad plus k xs = replicate (k - length xs) ' ' `plus` xs 52 | ldone = map (pad (++) (longest lstr)) lstr 53 | tdone = map (pad (flip (++)) (longest tstr)) tstr 54 | (lstr, tstr) = unzip . map (break (=='.') . render) . U.toList $ vs 55 | longest = maximum . map length 56 | render k = reverse . dropWhile (=='.') . dropWhile (=='0') . reverse . 57 | showFFloat (Just 4) k $ "" 58 | split [] = [] 59 | split xs = i : split rest where (i, rest) = splitAt c xs 60 | cleanEnd = reverse . dropWhile isSpace . reverse 61 | -------------------------------------------------------------------------------- /dense-linear-algebra/test/LibSpec.hs: -------------------------------------------------------------------------------- 1 | module Main where 2 | 3 | import Test.Hspec 4 | import Test.Hspec.QuickCheck 5 | 6 | 7 | main :: IO () 8 | main = hspec spec 9 | 10 | spec :: Spec 11 | spec = 12 | describe "Lib" $ do 13 | it "works" $ do 14 | True `shouldBe` True 15 | -- prop "ourAdd is commutative" $ \x y -> 16 | -- ourAdd x y `shouldBe` ourAdd y x 17 | -------------------------------------------------------------------------------- /dense-linear-algebra/test/Spec.hs: -------------------------------------------------------------------------------- 1 | {-# OPTIONS_GHC -F -pgmF hspec-discover #-} 2 | -------------------------------------------------------------------------------- /examples/kde/KDE.hs: -------------------------------------------------------------------------------- 1 | {-# LANGUAGE OverloadedStrings #-} 2 | 3 | import Control.Applicative ((<$>)) 4 | import Statistics.Sample.KernelDensity (kde) 5 | import Text.Hastache (MuType(..), defaultConfig, hastacheFile) 6 | import Text.Hastache.Context (mkStrContext) 7 | import qualified Data.Attoparsec.ByteString as B 8 | import qualified Data.Attoparsec.ByteString.Char8 as A 9 | import qualified Data.ByteString as B 10 | import qualified Data.ByteString.Lazy as L 11 | import qualified Data.Vector.Unboxed as U 12 | import qualified Data.Text.Lazy.IO as TL 13 | 14 | csv = do 15 | B.takeTill A.isEndOfLine 16 | (A.double `A.sepBy` A.char ',') `A.sepBy` A.endOfLine 17 | 18 | main = do 19 | waits <- (either error (U.fromList . map last . filter (not.null)) . 20 | A.parseOnly csv) <$> B.readFile "data/faithful.csv" 21 | let xs = map (\(a,b) -> [a,b]) . U.toList . uncurry U.zip . kde 64 $ waits 22 | context "data" = MuVariable . show $ xs 23 | s <- hastacheFile defaultConfig "kde.tpl" (mkStrContext context) 24 | TL.writeFile "kde.html" s 25 | -------------------------------------------------------------------------------- /examples/kde/data/faithful.csv: -------------------------------------------------------------------------------- 1 | eruption,wait 2 | 3.6,79 3 | 1.8,54 4 | 3.333,74 5 | 2.283,62 6 | 4.533,85 7 | 2.883,55 8 | 4.7,88 9 | 3.6,85 10 | 1.95,51 11 | 4.35,85 12 | 1.833,54 13 | 3.917,84 14 | 4.2,78 15 | 1.75,47 16 | 4.7,83 17 | 2.167,52 18 | 1.75,62 19 | 4.8,84 20 | 1.6,52 21 | 4.25,79 22 | 1.8,51 23 | 1.75,47 24 | 3.45,78 25 | 3.067,69 26 | 4.533,74 27 | 3.6,83 28 | 1.967,55 29 | 4.083,76 30 | 3.85,78 31 | 4.433,79 32 | 4.3,73 33 | 4.467,77 34 | 3.367,66 35 | 4.033,80 36 | 3.833,74 37 | 2.017,52 38 | 1.867,48 39 | 4.833,80 40 | 1.833,59 41 | 4.783,90 42 | 4.35,80 43 | 1.883,58 44 | 4.567,84 45 | 1.75,58 46 | 4.533,73 47 | 3.317,83 48 | 3.833,64 49 | 2.1,53 50 | 4.633,82 51 | 2,59 52 | 4.8,75 53 | 4.716,90 54 | 1.833,54 55 | 4.833,80 56 | 1.733,54 57 | 4.883,83 58 | 3.717,71 59 | 1.667,64 60 | 4.567,77 61 | 4.317,81 62 | 2.233,59 63 | 4.5,84 64 | 1.75,48 65 | 4.8,82 66 | 1.817,60 67 | 4.4,92 68 | 4.167,78 69 | 4.7,78 70 | 2.067,65 71 | 4.7,73 72 | 4.033,82 73 | 1.967,56 74 | 4.5,79 75 | 4,71 76 | 1.983,62 77 | 5.067,76 78 | 2.017,60 79 | 4.567,78 80 | 3.883,76 81 | 3.6,83 82 | 4.133,75 83 | 4.333,82 84 | 4.1,70 85 | 2.633,65 86 | 4.067,73 87 | 4.933,88 88 | 3.95,76 89 | 4.517,80 90 | 2.167,48 91 | 4,86 92 | 2.2,60 93 | 4.333,90 94 | 1.867,50 95 | 4.817,78 96 | 1.833,63 97 | 4.3,72 98 | 4.667,84 99 | 3.75,75 100 | 1.867,51 101 | 4.9,82 102 | 2.483,62 103 | 4.367,88 104 | 2.1,49 105 | 4.5,83 106 | 4.05,81 107 | 1.867,47 108 | 4.7,84 109 | 1.783,52 110 | 4.85,86 111 | 3.683,81 112 | 4.733,75 113 | 2.3,59 114 | 4.9,89 115 | 4.417,79 116 | 1.7,59 117 | 4.633,81 118 | 2.317,50 119 | 4.6,85 120 | 1.817,59 121 | 4.417,87 122 | 2.617,53 123 | 4.067,69 124 | 4.25,77 125 | 1.967,56 126 | 4.6,88 127 | 3.767,81 128 | 1.917,45 129 | 4.5,82 130 | 2.267,55 131 | 4.65,90 132 | 1.867,45 133 | 4.167,83 134 | 2.8,56 135 | 4.333,89 136 | 1.833,46 137 | 4.383,82 138 | 1.883,51 139 | 4.933,86 140 | 2.033,53 141 | 3.733,79 142 | 4.233,81 143 | 2.233,60 144 | 4.533,82 145 | 4.817,77 146 | 4.333,76 147 | 1.983,59 148 | 4.633,80 149 | 2.017,49 150 | 5.1,96 151 | 1.8,53 152 | 5.033,77 153 | 4,77 154 | 2.4,65 155 | 4.6,81 156 | 3.567,71 157 | 4,70 158 | 4.5,81 159 | 4.083,93 160 | 1.8,53 161 | 3.967,89 162 | 2.2,45 163 | 4.15,86 164 | 2,58 165 | 3.833,78 166 | 3.5,66 167 | 4.583,76 168 | 2.367,63 169 | 5,88 170 | 1.933,52 171 | 4.617,93 172 | 1.917,49 173 | 2.083,57 174 | 4.583,77 175 | 3.333,68 176 | 4.167,81 177 | 4.333,81 178 | 4.5,73 179 | 2.417,50 180 | 4,85 181 | 4.167,74 182 | 1.883,55 183 | 4.583,77 184 | 4.25,83 185 | 3.767,83 186 | 2.033,51 187 | 4.433,78 188 | 4.083,84 189 | 1.833,46 190 | 4.417,83 191 | 2.183,55 192 | 4.8,81 193 | 1.833,57 194 | 4.8,76 195 | 4.1,84 196 | 3.966,77 197 | 4.233,81 198 | 3.5,87 199 | 4.366,77 200 | 2.25,51 201 | 4.667,78 202 | 2.1,60 203 | 4.35,82 204 | 4.133,91 205 | 1.867,53 206 | 4.6,78 207 | 1.783,46 208 | 4.367,77 209 | 3.85,84 210 | 1.933,49 211 | 4.5,83 212 | 2.383,71 213 | 4.7,80 214 | 1.867,49 215 | 3.833,75 216 | 3.417,64 217 | 4.233,76 218 | 2.4,53 219 | 4.8,94 220 | 2,55 221 | 4.15,76 222 | 1.867,50 223 | 4.267,82 224 | 1.75,54 225 | 4.483,75 226 | 4,78 227 | 4.117,79 228 | 4.083,78 229 | 4.267,78 230 | 3.917,70 231 | 4.55,79 232 | 4.083,70 233 | 2.417,54 234 | 4.183,86 235 | 2.217,50 236 | 4.45,90 237 | 1.883,54 238 | 1.85,54 239 | 4.283,77 240 | 3.95,79 241 | 2.333,64 242 | 4.15,75 243 | 2.35,47 244 | 4.933,86 245 | 2.9,63 246 | 4.583,85 247 | 3.833,82 248 | 2.083,57 249 | 4.367,82 250 | 2.133,67 251 | 4.35,74 252 | 2.2,54 253 | 4.45,83 254 | 3.567,73 255 | 4.5,73 256 | 4.15,88 257 | 3.817,80 258 | 3.917,71 259 | 4.45,83 260 | 2,56 261 | 4.283,79 262 | 4.767,78 263 | 4.533,84 264 | 1.85,58 265 | 4.25,83 266 | 1.983,43 267 | 2.25,60 268 | 4.75,75 269 | 4.117,81 270 | 2.15,46 271 | 4.417,90 272 | 1.817,46 273 | 4.467,74 274 | -------------------------------------------------------------------------------- /examples/kde/kde.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | Kernel density 6 | 7 | 8 | 9 | 10 | 11 |

Kernel density

12 | 13 |
14 | 15 |

This is a 64-point kernel density estimate 16 | of wait 17 | times between eruptions of 18 | the Old 19 | Faithful geyser.

20 | 21 | 26 | 27 | 28 | 29 | -------------------------------------------------------------------------------- /examples/kde/kde.tpl: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | Kernel density 6 | 7 | 8 | 9 | 10 | 11 |

Kernel density

12 | 13 |
14 | 15 |

This is a 64-point kernel density estimate 16 | of wait 17 | times between eruptions of 18 | the Old 19 | Faithful geyser.

20 | 21 | 26 | 27 | 28 | 29 | -------------------------------------------------------------------------------- /tests/Tests/ApproxEq.hs: -------------------------------------------------------------------------------- 1 | {-# LANGUAGE FlexibleContexts, FlexibleInstances, TypeFamilies #-} 2 | 3 | module Tests.ApproxEq 4 | ( 5 | ApproxEq(..) 6 | ) where 7 | 8 | import Data.Complex (Complex(..), realPart) 9 | import Data.List (intersperse) 10 | import Data.Maybe (catMaybes) 11 | import Numeric.MathFunctions.Constants (m_epsilon) 12 | import Statistics.Matrix hiding (map, toList) 13 | import Test.QuickCheck 14 | import qualified Data.Vector as V 15 | import qualified Data.Vector.Generic as G 16 | import qualified Data.Vector.Unboxed as U 17 | import qualified Statistics.Matrix as M 18 | 19 | class (Eq a, Show a) => ApproxEq a where 20 | type Bounds a 21 | 22 | eq :: Bounds a -> a -> a -> Bool 23 | eql :: Bounds a -> a -> a -> Property 24 | eql eps a b = counterexample (show a ++ " /=~ " ++ show b) (eq eps a b) 25 | 26 | (=~) :: a -> a -> Bool 27 | 28 | (==~) :: a -> a -> Property 29 | a ==~ b = counterexample (show a ++ " /=~ " ++ show b) (a =~ b) 30 | 31 | instance ApproxEq Double where 32 | type Bounds Double = Double 33 | 34 | eq eps a b 35 | | a == 0 && b == 0 = True 36 | | otherwise = abs (a - b) <= eps * max (abs a) (abs b) 37 | (=~) = eq m_epsilon 38 | 39 | instance ApproxEq (Complex Double) where 40 | type Bounds (Complex Double) = Double 41 | 42 | eq eps a@(ar :+ ai) b@(br :+ bi) 43 | | a == 0 && b == 0 = True 44 | | otherwise = abs (ar - br) <= eps * d 45 | && abs (ai - bi) <= eps * d 46 | where 47 | d = max (realPart $ abs a) (realPart $ abs b) 48 | 49 | (=~) = eq m_epsilon 50 | 51 | instance ApproxEq [Double] where 52 | type Bounds [Double] = Double 53 | 54 | eq eps (x:xs) (y:ys) = eq eps x y && eq eps xs ys 55 | eq _ [] [] = True 56 | eq _ _ _ = False 57 | 58 | eql = eqll length id id 59 | (=~) = eq m_epsilon 60 | (==~) = eql m_epsilon 61 | 62 | instance ApproxEq (U.Vector Double) where 63 | type Bounds (U.Vector Double) = Double 64 | 65 | eq = eqv 66 | (=~) = eq m_epsilon 67 | eql = eqlv 68 | (==~) = eqlv m_epsilon 69 | 70 | instance ApproxEq (V.Vector Double) where 71 | type Bounds (V.Vector Double) = Double 72 | 73 | eq = eqv 74 | (=~) = eq m_epsilon 75 | eql = eqlv 76 | (==~) = eqlv m_epsilon 77 | 78 | instance ApproxEq Matrix where 79 | type Bounds Matrix = Double 80 | 81 | eq eps (Matrix r1 c1 v1) (Matrix r2 c2 v2) = 82 | (r1,c1) == (r2,c2) && eq eps v1 v2 83 | (=~) = eq m_epsilon 84 | eql eps a b = eqll dimension M.toList (`quotRem` cols a) eps a b 85 | (==~) = eql m_epsilon 86 | 87 | eqv :: (ApproxEq a, G.Vector v Bool, G.Vector v a) => 88 | Bounds a -> v a -> v a -> Bool 89 | eqv eps a b = G.length a == G.length b && G.and (G.zipWith (eq eps) a b) 90 | 91 | eqlv :: (ApproxEq [a], G.Vector v a) => Bounds [a] -> v a -> v a -> Property 92 | eqlv eps a b = eql eps (G.toList a) (G.toList b) 93 | 94 | eqll :: (ApproxEq l, ApproxEq a, Show c, Show d, Eq d, Bounds l ~ Bounds a) => 95 | (l -> d) -> (l -> [a]) -> (Int -> c) -> Bounds l -> l -> l -> Property 96 | eqll dim toList coord eps a b = counterexample fancy $ eq eps a b 97 | where 98 | fancy 99 | | la /= lb = "size mismatch: " ++ show la ++ " /= " ++ show lb 100 | | length summary < length full = summary 101 | | otherwise = full 102 | summary = concat . intersperse ", " . catMaybes $ 103 | zipWith3 whee (map coord [(0::Int)..]) xs ys 104 | full | '\n' `elem` sa = sa ++ " /=~\n" ++ sb 105 | | otherwise = sa ++ " /=~" ++ sb 106 | (sa, sb) = (show a, show b) 107 | (xs, ys) = (toList a, toList b) 108 | (la, lb) = (dim a, dim b) 109 | whee i x y | eq eps x y = Nothing 110 | | otherwise = Just $ show i ++ ": " ++ show x ++ " /=~ " ++ show y 111 | -------------------------------------------------------------------------------- /tests/Tests/Correlation.hs: -------------------------------------------------------------------------------- 1 | {-#LANGUAGE BangPatterns #-} 2 | 3 | module Tests.Correlation 4 | ( tests ) where 5 | 6 | import Control.Arrow (Arrow(..)) 7 | import qualified Data.Vector as V 8 | import Data.Maybe 9 | import Statistics.Correlation 10 | import Statistics.Correlation.Kendall 11 | import Test.Tasty 12 | import Test.Tasty.QuickCheck hiding (sample) 13 | import Test.Tasty.HUnit 14 | 15 | import Tests.ApproxEq 16 | 17 | ---------------------------------------------------------------- 18 | -- Tests list 19 | ---------------------------------------------------------------- 20 | 21 | tests :: TestTree 22 | tests = testGroup "Correlation" 23 | [ testProperty "Pearson correlation" testPearson 24 | , testProperty "Spearman correlation is scale invariant" testSpearmanScale 25 | , testProperty "Spearman correlation, nonlinear" testSpearmanNonlinear 26 | , testProperty "Kendall test -- general" testKendall 27 | , testCase "Kendall test -- special cases" testKendallSpecial 28 | ] 29 | 30 | 31 | ---------------------------------------------------------------- 32 | -- Pearson's correlation 33 | ---------------------------------------------------------------- 34 | 35 | testPearson :: [(Double,Double)] -> Property 36 | testPearson sample 37 | = (length sample > 1 && isJust exact) ==> (case exact of 38 | Just e -> e ~= fast 39 | Nothing -> property False 40 | ) 41 | where 42 | (~=) = eql 1e-12 43 | exact = exactPearson $ map (realToFrac *** realToFrac) sample 44 | fast = pearson $ V.fromList sample 45 | 46 | exactPearson :: [(Rational,Rational)] -> Maybe Double 47 | exactPearson sample 48 | | varX == 0 || varY == 0 = Nothing 49 | | otherwise = Just $ realToFrac cov / sqrt (realToFrac (varX * varY)) 50 | where 51 | (xs,ys) = unzip sample 52 | n = fromIntegral $ length sample 53 | -- Mean 54 | muX = sum xs / n 55 | muY = sum ys / n 56 | -- Mean of squares 57 | muX2 = sum (map (\x->x*x) xs) / n 58 | muY2 = sum (map (\x->x*x) ys) / n 59 | -- Covariance 60 | cov = sum (zipWith (*) [x - muX | x<-xs] [y - muY | y<-ys]) / n 61 | varX = muX2 - muX*muX 62 | varY = muY2 - muY*muY 63 | 64 | ---------------------------------------------------------------- 65 | -- Spearman's correlation 66 | ---------------------------------------------------------------- 67 | 68 | -- Test that Spearman correlation is scale invariant 69 | testSpearmanScale :: [(Double,Double)] -> Double -> Property 70 | testSpearmanScale xs a 71 | = and [ length xs > 1 -- Enough to calculate underflow 72 | , a /= 0 73 | , not (isNaN c1) 74 | , not (isNaN c2) 75 | , not (isNaN c3) 76 | , not (isNaN c4) 77 | ] 78 | ==> ( counterexample (show xs2) 79 | $ counterexample (show xs3) 80 | $ counterexample (show xs4) 81 | $ counterexample (show (c1,c2,c3,c4)) 82 | $ and [ c1 == c4 83 | , c1 == signum a * c2 84 | , c1 == signum a * c3 85 | ] 86 | ) 87 | where 88 | xs2 = map ((*a) *** id ) xs 89 | xs3 = map (id *** (*a)) xs 90 | xs4 = map ((*a) *** (*a)) xs 91 | c1 = spearman $ V.fromList xs 92 | c2 = spearman $ V.fromList xs2 93 | c3 = spearman $ V.fromList xs3 94 | c4 = spearman $ V.fromList xs4 95 | 96 | -- Test that Spearman correlation allows to transform sample with 97 | testSpearmanNonlinear :: [(Double,Double)] -> Property 98 | testSpearmanNonlinear sample0 99 | = and [ length sample0 > 1 100 | , not (isNaN c1) 101 | , not (isNaN c2) 102 | , not (isNaN c3) 103 | , not (isNaN c4) 104 | ] 105 | ==> ( counterexample ("S0 = " ++ show sample0) 106 | $ counterexample ("S1 = " ++ show sample1) 107 | $ counterexample ("S2 = " ++ show sample2) 108 | $ counterexample ("S3 = " ++ show sample3) 109 | $ counterexample ("S4 = " ++ show sample4) 110 | $ counterexample (show (c1,c2,c3,c4)) 111 | $ and [ c1 == c2 112 | , c1 == c3 113 | , c1 == c4 114 | ] 115 | ) 116 | where 117 | -- We need to stretch sample into [-10 .. 10] range to avoid 118 | -- problems with under/overflows etc. 119 | stretch xs 120 | | a == b = xs 121 | | otherwise = [ ((x - a)/(b - a) - 0.5) * 20 | x <- xs ] 122 | where 123 | a = minimum xs 124 | b = maximum xs 125 | sample1 = uncurry zip $ (stretch *** stretch) $ unzip sample0 126 | sample2 = map (exp *** id ) sample1 127 | sample3 = map (id *** exp) sample1 128 | sample4 = map (exp *** exp) sample1 129 | c1 = spearman $ V.fromList sample1 130 | c2 = spearman $ V.fromList sample2 131 | c3 = spearman $ V.fromList sample3 132 | c4 = spearman $ V.fromList sample4 133 | 134 | 135 | ---------------------------------------------------------------- 136 | -- Kendall's correlation 137 | ---------------------------------------------------------------- 138 | 139 | testKendall :: [(Double, Double)] -> Bool 140 | testKendall xy | isNaN r1 = isNaN r2 141 | | otherwise = r1 == r2 142 | where 143 | r1 = kendallBruteForce xy 144 | r2 = kendall $ V.fromList xy 145 | 146 | testKendallSpecial :: Assertion 147 | testKendallSpecial = vs @=? map (\(xs, ys) -> kendall $ V.fromList $ zip xs ys) d 148 | where 149 | (d, vs) = unzip testData 150 | testData :: [(([Double], [Double]), Double)] 151 | testData = [ (([1, 2, 3, 1, 2], [1, 2, 1, 5, 2]), -0.375) 152 | , (([1, 1, 1, 3, 3], [3, 3, 3, 2, 5]), 0) 153 | ] 154 | 155 | 156 | kendallBruteForce :: [(Double, Double)] -> Double 157 | kendallBruteForce xy = (n_c - n_d) / sqrt ((n_0 - n_1) * (n_0 - n_2)) 158 | where 159 | allPairs = f xy 160 | (n_c, n_d, n_1, n_2) = foldl g (0,0,0,0) allPairs 161 | n_0 = fromIntegral.length $ allPairs 162 | g (!nc, !nd, !n1, !n2) ((x1, y1), (x2, y2)) 163 | | (x2 - x1) * (y2 - y1) > 0 = (nc+1, nd, n1, n2) 164 | | (x2 - x1) * (y2 - y1) < 0 = (nc, nd+1, n1, n2) 165 | | otherwise = if x1 == x2 166 | then if y1 == y2 167 | then (nc, nd, n1+1, n2+1) 168 | else (nc, nd, n1+1, n2) 169 | else (nc, nd, n1, n2+1) 170 | f (x:xs) = zip (repeat x) xs ++ f xs 171 | f _ = [] 172 | -------------------------------------------------------------------------------- /tests/Tests/Function.hs: -------------------------------------------------------------------------------- 1 | module Tests.Function ( tests ) where 2 | 3 | import Statistics.Function 4 | import Test.Tasty 5 | import Test.Tasty.QuickCheck 6 | import Test.QuickCheck 7 | import Tests.Helpers 8 | import qualified Data.Vector.Unboxed as U 9 | 10 | 11 | tests :: TestTree 12 | tests = testGroup "S.Function" 13 | [ testProperty "Sort is sort" p_sort 14 | , testAssertion "nextHighestPowerOfTwo is OK" p_nextHighestPowerOfTwo 15 | ] 16 | 17 | 18 | p_sort :: [Double] -> Property 19 | p_sort xs = 20 | not (null xs) ==> U.all (uncurry (<=)) (U.zip v $ U.tail v) 21 | where 22 | v = sort $ U.fromList xs 23 | 24 | p_nextHighestPowerOfTwo :: Bool 25 | p_nextHighestPowerOfTwo 26 | = all (\(good, is) -> all ((==good) . nextHighestPowerOfTwo) is) lists 27 | where 28 | pows = [1 .. 17 :: Int] 29 | lists = [ (2^m, [2^n+1 .. 2^m]) | (n,m) <- pows `zip` tail pows ] 30 | -------------------------------------------------------------------------------- /tests/Tests/Helpers.hs: -------------------------------------------------------------------------------- 1 | {-# LANGUAGE ScopedTypeVariables #-} 2 | -- | Helpers for testing 3 | module Tests.Helpers ( 4 | -- * helpers 5 | T(..) 6 | , typeName 7 | , Double01(..) 8 | -- * IEEE 754 9 | , isDenorm 10 | -- * Generic QC tests 11 | , monotonicallyIncreases 12 | , monotonicallyIncreasesIEEE 13 | -- * HUnit helpers 14 | , testAssertion 15 | , testEquality 16 | -- * QC helpers 17 | , small 18 | , unsquare 19 | , shrinkFixedList 20 | ) where 21 | 22 | import Data.Typeable 23 | import Numeric.MathFunctions.Constants (m_tiny) 24 | import Test.Tasty 25 | import Test.Tasty.HUnit 26 | import Test.QuickCheck 27 | import qualified Numeric.IEEE as IEEE 28 | import qualified Test.Tasty.HUnit as HU 29 | 30 | -- | Phantom typed value used to select right instance in QC tests 31 | data T a = T 32 | 33 | -- | String representation of type name 34 | typeName :: Typeable a => T a -> String 35 | typeName = show . typeOf . typeParam 36 | where 37 | typeParam :: T a -> a 38 | typeParam _ = undefined 39 | 40 | -- | Check if Double denormalized 41 | isDenorm :: Double -> Bool 42 | isDenorm x = let ax = abs x in ax > 0 && ax < m_tiny 43 | 44 | -- | Generates Doubles in range [0,1] 45 | newtype Double01 = Double01 Double 46 | deriving (Show) 47 | instance Arbitrary Double01 where 48 | arbitrary = do 49 | (_::Int, x) <- fmap properFraction arbitrary 50 | return $ Double01 x 51 | 52 | ---------------------------------------------------------------- 53 | -- Generic QC 54 | ---------------------------------------------------------------- 55 | 56 | -- Check that function is nondecreasing 57 | monotonicallyIncreases :: (Ord a, Ord b) => (a -> b) -> a -> a -> Bool 58 | monotonicallyIncreases f x1 x2 = f (min x1 x2) <= f (max x1 x2) 59 | 60 | -- Check that function is nondecreasing taking rounding errors into 61 | -- account. 62 | -- 63 | -- In fact function is allowed to decrease less than one ulp in order 64 | -- to guard against problems with excess precision. On x86 FPU works 65 | -- with 80-bit numbers but doubles are 64-bit so rounding happens 66 | -- whenever values are moved from registers to memory 67 | monotonicallyIncreasesIEEE :: (Ord a, IEEE.IEEE b) => (a -> b) -> a -> a -> Bool 68 | monotonicallyIncreasesIEEE f x1 x2 = 69 | y1 <= y2 || (y1 - y2) < y2 * IEEE.epsilon 70 | where 71 | y1 = f (min x1 x2) 72 | y2 = f (max x1 x2) 73 | 74 | ---------------------------------------------------------------- 75 | -- HUnit helpers 76 | ---------------------------------------------------------------- 77 | 78 | testAssertion :: String -> Bool -> TestTree 79 | testAssertion str cont = testCase str $ HU.assertBool str cont 80 | 81 | testEquality :: (Show a, Eq a) => String -> a -> a -> TestTree 82 | testEquality msg a b = testCase msg $ HU.assertEqual msg a b 83 | 84 | unsquare :: (Arbitrary a, Show a, Testable b) => (a -> b) -> Property 85 | unsquare = forAll (small arbitrary) 86 | 87 | small :: Gen a -> Gen a 88 | small act = sized $ \n -> resize (smallish n) act 89 | where smallish = round . (sqrt :: Double -> Double) . fromIntegral . abs 90 | 91 | shrinkFixedList :: (a -> [a]) -> [a] -> [[a]] 92 | shrinkFixedList shr (x:xs) = map (:xs) (shr x) ++ map (x:) (shrinkFixedList shr xs) 93 | shrinkFixedList _ [] = [] 94 | -------------------------------------------------------------------------------- /tests/Tests/KDE.hs: -------------------------------------------------------------------------------- 1 | -- | Tests for Kernel density estimates. 2 | module Tests.KDE ( 3 | tests 4 | )where 5 | 6 | import Data.Vector.Unboxed ((!)) 7 | import Numeric.Sum (kbn, sumVector) 8 | import Statistics.Sample.KernelDensity 9 | import Test.Tasty (TestTree, testGroup) 10 | import Test.Tasty.QuickCheck (testProperty) 11 | import Test.QuickCheck (Property, (==>), counterexample) 12 | import Text.Printf (printf) 13 | import qualified Data.Vector.Unboxed as U 14 | 15 | 16 | tests :: TestTree 17 | tests = testGroup "KDE" 18 | [ testProperty "integral(PDF) == 1" t_densityIsPDF 19 | ] 20 | 21 | t_densityIsPDF :: [Double] -> Property 22 | t_densityIsPDF vec 23 | = not (null vec) ==> test 24 | where 25 | (xs,ys) = kde 4096 (U.fromList vec) 26 | step = (xs ! 1) - (xs ! 0) 27 | integral = integratePDF step ys 28 | -- 29 | test = counterexample (printf "Integral %f" integral) 30 | $ abs (1 - integral) <= 1e-3 31 | 32 | 33 | 34 | integratePDF :: Double -> U.Vector Double -> Double 35 | integratePDF step vec 36 | = step * sumVector kbn (U.zipWith (*) vec weights) 37 | where 38 | n = U.length vec 39 | weights = U.generate n go 40 | where 41 | go i | i == 0 = 0.5 42 | | i == n-1 = 0.5 43 | | otherwise = 1 44 | -------------------------------------------------------------------------------- /tests/Tests/Matrix.hs: -------------------------------------------------------------------------------- 1 | module Tests.Matrix (tests) where 2 | 3 | import Statistics.Matrix hiding (map) 4 | import Statistics.Matrix.Algorithms 5 | import Test.Tasty (TestTree, testGroup) 6 | import Test.Tasty.QuickCheck (testProperty) 7 | import Test.QuickCheck 8 | import Tests.Matrix.Types 9 | import qualified Data.Vector.Unboxed as U 10 | 11 | t_row :: Mat Double -> Gen Property 12 | t_row ms@(Mat r _ xs) = do 13 | i <- choose (0,r-1) 14 | return $ row (fromMat ms) i === U.fromList (xs !! i) 15 | 16 | t_column :: Mat Double -> Gen Property 17 | t_column ms@(Mat _ c xs) = do 18 | i <- choose (0,c-1) 19 | return $ column (fromMat ms) i === U.fromList (map (!! i) xs) 20 | 21 | t_center :: Mat Double -> Property 22 | t_center ms@(Mat r c xs) = 23 | (xs !! (r `quot` 2)) !! (c `quot` 2) === center (fromMat ms) 24 | 25 | t_transpose :: Matrix -> Property 26 | t_transpose m = U.concat (map (column n) [0..rows m-1]) === toVector m 27 | where n = transpose m 28 | 29 | t_qr :: Property 30 | t_qr = property $ do 31 | a <- do (r,c) <- arbitrary 32 | fromMat <$> arbMatWith r c (fromIntegral <$> choose (-10, 10::Int)) 33 | let (q,r) = qr a 34 | a' = multiply q r 35 | pure $ counterexample ("A = \n"++show a) 36 | $ counterexample ("A' = \n"++show a') 37 | $ counterexample ("Q = \n"++show q) 38 | $ counterexample ("R = \n"++show r) 39 | $ dimension a == dimension a' 40 | && ( hasNaN a' 41 | || and (zipWith (\x y -> abs (x - y) < 1e-12) (toList a) (toList a')) 42 | ) 43 | 44 | tests :: TestTree 45 | tests = testGroup "Matrix" 46 | [ testProperty "t_row" t_row 47 | , testProperty "t_column" t_column 48 | , testProperty "t_center" t_center 49 | , testProperty "t_transpose" t_transpose 50 | , testProperty "t_qr" t_qr 51 | ] 52 | -------------------------------------------------------------------------------- /tests/Tests/Matrix/Types.hs: -------------------------------------------------------------------------------- 1 | {-# LANGUAGE DeriveFunctor #-} 2 | {-# OPTIONS_GHC -fno-warn-orphans #-} 3 | 4 | module Tests.Matrix.Types 5 | ( 6 | Mat(..) 7 | , fromMat 8 | , toMat 9 | , arbMat 10 | , arbMatWith 11 | ) where 12 | 13 | import Control.Monad (join) 14 | import Control.Applicative ((<$>), (<*>)) 15 | import Statistics.Matrix (Matrix(..), fromList) 16 | import Test.QuickCheck 17 | import Tests.Helpers (shrinkFixedList, small) 18 | import qualified Data.Vector.Unboxed as U 19 | 20 | data Mat a = Mat { mrows :: Int , mcols :: Int 21 | , asList :: [[a]] } 22 | deriving (Eq, Ord, Show, Functor) 23 | 24 | fromMat :: Mat Double -> Matrix 25 | fromMat (Mat r c xs) = fromList r c (concat xs) 26 | 27 | toMat :: Matrix -> Mat Double 28 | toMat (Matrix r c v) = Mat r c . split . U.toList $ v 29 | where split xs@(_:_) = let (h,t) = splitAt c xs 30 | in h : split t 31 | split [] = [] 32 | 33 | instance (Arbitrary a) => Arbitrary (Mat a) where 34 | arbitrary = small $ join (arbMat <$> arbitrary <*> arbitrary) 35 | shrink (Mat r c xs) = Mat r c <$> shrinkFixedList (shrinkFixedList shrink) xs 36 | 37 | arbMat 38 | :: (Arbitrary a) 39 | => Positive (Small Int) 40 | -> Positive (Small Int) 41 | -> Gen (Mat a) 42 | arbMat r c = arbMatWith r c arbitrary 43 | 44 | arbMatWith 45 | :: (Arbitrary a) 46 | => Positive (Small Int) 47 | -> Positive (Small Int) 48 | -> Gen a 49 | -> Gen (Mat a) 50 | arbMatWith (Positive (Small r)) (Positive (Small c)) genA = 51 | Mat r c <$> vectorOf r (vectorOf c genA) 52 | 53 | instance Arbitrary Matrix where 54 | arbitrary = fromMat <$> arbitrary 55 | -- shrink = map fromMat . shrink . toMat 56 | -------------------------------------------------------------------------------- /tests/Tests/NonParametric/Table.hs: -------------------------------------------------------------------------------- 1 | module Tests.NonParametric.Table ( 2 | tableKSD 3 | , tableKS2D 4 | ) where 5 | 6 | -- Table for Kolmogorov-Smirnov statistics for standard normal 7 | -- distribution. Generated using R. 8 | -- 9 | -- First element of tuple is D second is sample for which it was 10 | -- calculated. 11 | tableKSD :: [(Double,[Double])] 12 | tableKSD = 13 | [ (0.2012078,[1.360645,-0.3151904,-1.245443,0.1741977,-0.1421206,-1.798246,1.171594,-1.335844,-5.050093e-2,1.030063,-1.849005,0.6491455,-0.7028004]) 14 | , (0.2569956,[0.3884734,-1.227821,-0.4166262,0.429118,-0.9280124,0.8025867,-0.6703089,-0.2124872,0.1224496,0.1087734,-4.285284e-2,-1.039936,-0.7071956]) 15 | , (0.1960356,[-1.814745,-0.6327167,0.7082493,0.6264716,1.02061,-0.4094635,0.821026,-0.4255047,-0.4820728,-0.2239833,0.648517,1.114283,0.3610216]) 16 | , (0.2095746,[0.187011,0.1805498,0.4448389,0.6065506,0.2308673,0.5292549,-1.489902,-1.455191,0.5449396,-0.1436403,-0.7977073,-0.2693545,0.8260888,-1.474473,-2.158696e-2,-0.1455387]) 17 | , (0.1922603,[0.5772317,-1.255561,1.605823,0.4923361,0.2470848,1.176101,-0.3767689,-0.6896885,0.4509345,-0.5048447,0.9436534,1.025599,0.2998393,-3.415219e-2,1.264315,-1.44433,-1.646449e-2]) 18 | , (0.2173401,[1.812807,-0.8687497,-0.5710508,1.003647,1.142621,0.6546577,-6.083323e-3,1.628574e-2,1.067499,-1.953143,-0.6060077,1.90859,-0.7480553,0.6715162,-0.928759,1.862,1.604621,-0.2171044,-0.1835918]) 19 | , (0.2510541,[-0.4769572,1.062319,0.9952284,1.198086,1.015589,-0.4154523,-0.6711762,1.202902,0.2217098,5.381759e-2,0.6679715,0.2551287,-0.1371492]) 20 | , (0.1996022,[1.158607,-0.7354863,1.526559,-0.7855418,-2.82999,-0.6045106,-0.1830228,0.3306812,-0.819657,-1.223715,0.2536423,-0.4155781,1.447042]) 21 | , (0.2284761,[1.239965,0.8187093,0.5199788,1.172072,0.748259,1.869376e-2,0.1625921,-1.712065,0.7043582,-1.702702,-0.4792806,-0.1023351,0.1187189]) 22 | , (0.2337866,[0.9417261,-0.1024297,-0.7354359,1.099991,0.801984,-0.3745397,-1.749564,1.795771,1.099963,-0.605557,-2.035897,1.893603,-0.3468928,-0.2593938,2.100988,0.9665698,0.8757091,0.7696328,0.8730729,-0.3990352,2.04361,-0.4617864,-0.155021,2.15774,0.2687795,-0.9853512,-0.3264898,1.260026,4.267695,-0.5571145,0.6307067,-0.1691405,-1.730686]) 23 | , (0.3389167,[2.025542,-1.542641,-1.090238,3.99027,9.949129e-2,-0.8974433,-2.508418,6.390346,-2.675515,1.154459,1.688072,2.220727,-0.4743102]) 24 | , (0.4920231,[0.5192906,-3.260813,-1.245185,1.693084,3.561318,4.058924,2.27063,0.9446943,4.794159,-3.423733,0.8240817,0.644059,0.900175,1.932513,1.024586,2.82823,2.072192,-0.353231,-0.4319673,1.505952,1.0199,4.555054,2.364929,5.531467,3.279415,3.19821,2.726925,1.680027,-0.9041334,-0.8246765,-1.343979,8.454955,1.354581]) 25 | , (0.6727408,[-6.705672,-3.193988,-4.612611,-3.207994,-5.070172,-6.141169,-0.397149,-4.093359,-1.204801,-3.986585,-2.724662,0.9868107,-6.295266,-5.95839,-6.35114,-1.679555,-2.635889,-4.050329,1.557428,-2.548465,-0.9073924,-1.502018,-4.535688,-4.158818,-8.833434,-5.944697,-1.569672,-4.70399,-7.832059,-4.093708,-8.393417,-2.085432,-7.06495,-0.4230419,-3.046822,-3.23895,-0.9265873,-9.227822,3.293713,-5.593577,-5.942398,-4.358421,2.660044,-4.301572,-1.258879,0.1499903,3.572833,-3.19844,0.8652432,-0.3025793,-1.576673,-7.666265,-6.751463,-1.398944,-2.690656,-1.429654,7.508364e-2,0.7998344,-3.562074,-1.021431,1.342968,2.110244,-7.561497,-2.372083,-3.649193,-5.7723,-1.068083,0.7537809,-4.569546,-1.198005,-5.638384,-1.227226,-1.195852,-1.118175,-9.130527,0.9675821,-2.497391,0.5988562,-1.965783,-4.25741,-6.547006,-1.459294,-2.380556,-3.977307,-7.809006,-4.276819,-4.028746,-9.055546,-3.599239,-1.470512,-8.253329,-1.351687,-4.269324,-6.140353,-6.30808,-1.834091,-3.135146,-9.391791,3.117815,-5.554733,-2.556769,-3.287376,-2.064013,-5.741995,-5.047918,-4.808841,-1.488526,-0.2351115,-5.760833,-2.722929,-7.012353,2.281171,-3.890514,-1.516824,-1.41011,-1.828457,-5.561244,-3.472142,-10.16919,-0.4369042,-5.698953,-4.587462,-4.897086]) 26 | ] 27 | 28 | -- Table for 2-sample Kolmogorov-Smirnov statistics. Generated using R 29 | -- 30 | -- First element is D, second and third are samples 31 | tableKS2D :: [(Double,[Double],[Double])] 32 | tableKS2D = 33 | [ (0.2820513,[-0.4212928,2.146532,0.7585263,-0.5086105,-0.7725486,6.235548e-2,-0.1849861,0.861972,-0.1958534,-3.379697e-2,-1.316854,0.6701269],[0.4957582,0.4241167,0.9822869,0.4504248,-0.1749617,1.178098,-1.117222,-0.859273,0.3073736,0.4344583,-0.4761338,-1.332374,1.487291]) 34 | , (0.2820513,[-0.712252,0.7990333,-0.7968473,1.443609,1.163096,-1.349071,-0.1553941,-2.003104,-0.3400618,-0.7019282,0.183293,-0.2352167],[-0.4622455,-0.8132221,0.1161614,-1.472115e-2,1.001454,-6.557789e-2,-0.2531216,-1.032432,0.4105478,1.749614,0.9722899,5.850942e-2,-0.3352746]) 35 | , (0.2564103,[0.3509882,-0.2982833,1.314731,1.264223,-0.8156374,0.3734029,-3.288915e-2,0.6766016,0.9786335,0.1079949,-0.4211722,1.58656],[0.8024675,7.464538e-2,0.2739861,-2.334255e-2,0.5611802,0.6683374,0.4358206,0.349843,1.207834,1.402578,-0.4049183,0.4286042,1.665129]) 36 | , (0.1833333,[1.376196,9.926384e-2,2.199292,-2.04993,0.5585353,-0.4812132,0.1041527,2.084774,0.71194,-1.398245,-4.458574e-2,1.484945,-1.473182,1.020076,-0.7019646,0.2182066,-1.702963,-0.3522622,-0.8129267,-0.6338972],[-1.020371,0.3323861,1.513288,0.1958708,-1.0723,5.323446e-2,-0.9993713,-0.7046356,-0.6781067,-0.4471603,1.512042,-0.2650665,-4.765228e-2,-1.501205,1.228664,0.5332935,-0.2960315,-0.1509683]) 37 | , (0.5666667,[0.7145305,0.1255674,2.001531,0.1419216],[2.113474,-0.3352839,-0.4962429,-1.386079,0.6404667,-0.7145304,0.1084008,-0.9821421,-2.270472,-1.003846,-0.5644588,2.699695,-1.296494,-0.1538839,1.319094,-1.127544,0.3568889,0.2004726,-1.313291,0.3581084,0.3313498,0.9336278,0.9850203,-1.309506,1.170459,-0.7517466,-1.771269,0.7156381,-1.129691,0.877729]) 38 | , (0.5,[0.6950626,0.1643805,-0.3102472,0.4810762,0.1844602,1.338836,-0.8083386,-0.5482141,0.9532421,-0.2644837],[7.527945,-1.95654,1.513725,-1.318431,2.453895,0.2078194,0.7371092,2.834245,-2.134794,3.938259]) 39 | ] 40 | -------------------------------------------------------------------------------- /tests/Tests/Orphanage.hs: -------------------------------------------------------------------------------- 1 | {-# LANGUAGE FlexibleContexts #-} 2 | {-# LANGUAGE ScopedTypeVariables #-} 3 | {-# OPTIONS_GHC -fno-warn-orphans #-} 4 | -- | 5 | -- Orphan instances for common data types 6 | module Tests.Orphanage where 7 | 8 | import Control.Applicative 9 | import Statistics.Distribution.Beta (BetaDistribution, betaDistr) 10 | import Statistics.Distribution.Binomial (BinomialDistribution, binomial) 11 | import Statistics.Distribution.CauchyLorentz 12 | import Statistics.Distribution.ChiSquared (ChiSquared, chiSquared) 13 | import Statistics.Distribution.Exponential (ExponentialDistribution, exponential) 14 | import Statistics.Distribution.FDistribution (FDistribution, fDistribution) 15 | import Statistics.Distribution.Gamma (GammaDistribution, gammaDistr) 16 | import Statistics.Distribution.Geometric 17 | import Statistics.Distribution.Hypergeometric 18 | import Statistics.Distribution.Laplace (LaplaceDistribution, laplace) 19 | import Statistics.Distribution.Lognormal (LognormalDistribution, lognormalDistr) 20 | import Statistics.Distribution.NegativeBinomial (NegativeBinomialDistribution, negativeBinomial) 21 | import Statistics.Distribution.Normal (NormalDistribution, normalDistr) 22 | import Statistics.Distribution.Poisson (PoissonDistribution, poisson) 23 | import Statistics.Distribution.StudentT 24 | import Statistics.Distribution.Transform (LinearTransform, scaleAround) 25 | import Statistics.Distribution.Uniform (UniformDistribution, uniformDistr) 26 | import Statistics.Distribution.Weibull (WeibullDistribution, weibullDistr) 27 | import Statistics.Distribution.DiscreteUniform (DiscreteUniform, discreteUniformAB) 28 | import Statistics.Types 29 | 30 | import Test.QuickCheck as QC 31 | 32 | 33 | ---------------------------------------------------------------- 34 | -- Arbitrary instances for distributions 35 | ---------------------------------------------------------------- 36 | 37 | instance QC.Arbitrary BinomialDistribution where 38 | arbitrary = binomial <$> QC.choose (1,100) <*> QC.choose (0,1) 39 | instance QC.Arbitrary ExponentialDistribution where 40 | arbitrary = exponential <$> QC.choose (0,100) 41 | instance QC.Arbitrary LaplaceDistribution where 42 | arbitrary = laplace <$> QC.choose (-10,10) <*> QC.choose (0, 2) 43 | instance QC.Arbitrary GammaDistribution where 44 | arbitrary = gammaDistr <$> QC.choose (0.1,100) <*> QC.choose (0.1,100) 45 | instance QC.Arbitrary BetaDistribution where 46 | arbitrary = betaDistr <$> QC.choose (1e-3,10) <*> QC.choose (1e-3,10) 47 | instance QC.Arbitrary GeometricDistribution where 48 | arbitrary = geometric <$> QC.choose (1e-10,1) 49 | instance QC.Arbitrary GeometricDistribution0 where 50 | arbitrary = geometric0 <$> QC.choose (1e-10,1) 51 | instance QC.Arbitrary HypergeometricDistribution where 52 | arbitrary = do l <- QC.choose (1,20) 53 | m <- QC.choose (0,l) 54 | k <- QC.choose (1,l) 55 | return $ hypergeometric m l k 56 | instance QC.Arbitrary LognormalDistribution where 57 | -- can't choose sigma too big, otherwise goes outside of double-float limit 58 | arbitrary = lognormalDistr <$> QC.choose (-100,100) <*> QC.choose (1e-10, 20) 59 | instance QC.Arbitrary NegativeBinomialDistribution where 60 | arbitrary = negativeBinomial <$> QC.choose (1,100) <*> QC.choose (1e-10,1) 61 | instance QC.Arbitrary NormalDistribution where 62 | arbitrary = normalDistr <$> QC.choose (-100,100) <*> QC.choose (1e-3, 1e3) 63 | instance QC.Arbitrary PoissonDistribution where 64 | arbitrary = poisson <$> QC.choose (0,1) 65 | instance QC.Arbitrary ChiSquared where 66 | arbitrary = chiSquared <$> QC.choose (1,100) 67 | instance QC.Arbitrary UniformDistribution where 68 | arbitrary = do a <- QC.arbitrary 69 | b <- QC.arbitrary `suchThat` (/= a) 70 | return $ uniformDistr a b 71 | instance QC.Arbitrary WeibullDistribution where 72 | arbitrary = weibullDistr <$> QC.choose (1e-3,1e3) <*> QC.choose (1e-3, 1e3) 73 | instance QC.Arbitrary CauchyDistribution where 74 | arbitrary = cauchyDistribution 75 | <$> arbitrary 76 | <*> ((abs <$> arbitrary) `suchThat` (> 0)) 77 | instance QC.Arbitrary StudentT where 78 | arbitrary = studentT <$> ((abs <$> arbitrary) `suchThat` (>0)) 79 | instance QC.Arbitrary d => QC.Arbitrary (LinearTransform d) where 80 | arbitrary = do 81 | m <- QC.choose (-10,10) 82 | s <- QC.choose (1e-1,1e1) 83 | d <- arbitrary 84 | return $ scaleAround m s d 85 | instance QC.Arbitrary FDistribution where 86 | arbitrary = fDistribution 87 | <$> ((abs <$> arbitrary) `suchThat` (>0)) 88 | <*> ((abs <$> arbitrary) `suchThat` (>0)) 89 | 90 | 91 | instance (Arbitrary a, Ord a, RealFrac a) => Arbitrary (PValue a) where 92 | arbitrary = do 93 | (_::Int,x) <- properFraction <$> arbitrary 94 | return $ mkPValue $ abs x 95 | 96 | instance (Arbitrary a, Ord a, RealFrac a) => Arbitrary (CL a) where 97 | arbitrary = do 98 | (_::Int,x) <- properFraction <$> arbitrary 99 | return $ mkCLFromSignificance $ abs x 100 | 101 | instance Arbitrary a => Arbitrary (NormalErr a) where 102 | arbitrary = NormalErr <$> arbitrary 103 | 104 | instance Arbitrary a => Arbitrary (ConfInt a) where 105 | arbitrary = liftA3 ConfInt arbitrary arbitrary arbitrary 106 | 107 | instance (Arbitrary (e a), Arbitrary a) => Arbitrary (Estimate e a) where 108 | arbitrary = liftA2 Estimate arbitrary arbitrary 109 | 110 | instance (Arbitrary a) => Arbitrary (UpperLimit a) where 111 | arbitrary = liftA2 UpperLimit arbitrary arbitrary 112 | 113 | instance (Arbitrary a) => Arbitrary (LowerLimit a) where 114 | arbitrary = liftA2 LowerLimit arbitrary arbitrary 115 | 116 | instance QC.Arbitrary DiscreteUniform where 117 | arbitrary = discreteUniformAB <$> QC.choose (1,1000) <*> QC.choose(1,1000) 118 | -------------------------------------------------------------------------------- /tests/Tests/Parametric.hs: -------------------------------------------------------------------------------- 1 | module Tests.Parametric (tests) where 2 | 3 | import Data.Maybe (fromJust) 4 | import Statistics.Test.StudentT 5 | import Statistics.Types 6 | import qualified Data.Vector.Unboxed as U 7 | import Test.Tasty (testGroup) 8 | import Tests.Helpers (testEquality) 9 | import qualified Test.Tasty as Tst 10 | 11 | tests :: Tst.TestTree 12 | tests = testGroup "Parametric tests" studentTTests 13 | 14 | -- 2 samples x 20 obs data 15 | -- 16 | -- Both samples are samples from normal distributions with the same variance (= 1.0), 17 | -- but their means are different (0.0 and 0.5, respectively). 18 | -- 19 | -- You can reproduce the data with R (3.1.0) as follows: 20 | -- set.seed(0) 21 | -- sample1 = rnorm(20) 22 | -- sample2 = rnorm(20, 0.5) 23 | -- student = t.test(sample1, sample2, var.equal=T) 24 | -- welch = t.test(sample1, sample2) 25 | -- paired = t.test(sample1, sample2, paired=T) 26 | sample1, sample2 :: U.Vector Double 27 | sample1 = U.fromList [ 28 | 1.262954284880793e+00, 29 | -3.262333607056494e-01, 30 | 1.329799262922501e+00, 31 | 1.272429321429405e+00, 32 | 4.146414344564082e-01, 33 | -1.539950041903710e+00, 34 | -9.285670347135381e-01, 35 | -2.947204467905602e-01, 36 | -5.767172747536955e-03, 37 | 2.404653388857951e+00, 38 | 7.635934611404596e-01, 39 | -7.990092489893682e-01, 40 | -1.147657009236351e+00, 41 | -2.894615736882233e-01, 42 | -2.992151178973161e-01, 43 | -4.115108327950670e-01, 44 | 2.522234481561323e-01, 45 | -8.919211272845686e-01, 46 | 4.356832993557186e-01, 47 | -1.237538421929958e+00] 48 | sample2 = U.fromList [ 49 | 2.757321147216907e-01, 50 | 8.773956459817011e-01, 51 | 6.333363608148415e-01, 52 | 1.304189509744908e+00, 53 | 4.428932256161913e-01, 54 | 1.003607972233726e+00, 55 | 1.585769362145687e+00, 56 | -1.909538396968303e-01, 57 | -7.845993538721883e-01, 58 | 5.467261721883520e-01, 59 | 2.642934435604988e-01, 60 | -4.288825501025439e-02, 61 | 6.668968254321778e-02, 62 | -1.494716467962331e-01, 63 | 1.226750747385451e+00, 64 | 1.651911754087200e+00, 65 | 1.492160365445798e+00, 66 | 7.048689050811874e-02, 67 | 1.738304100853380e+00, 68 | 2.206537181457307e-01] 69 | 70 | 71 | testTTest :: String 72 | -> PValue Double 73 | -> Test d 74 | -> [Tst.TestTree] 75 | testTTest name pVal test = 76 | [ testEquality name (isSignificant pVal test) NotSignificant 77 | , testEquality name (isSignificant (mkPValue $ pValue pVal + 1e-5) test) 78 | Significant 79 | ] 80 | 81 | studentTTests :: [Tst.TestTree] 82 | studentTTests = concat 83 | [ -- R: t.test(sample1, sample2, alt="two.sided", var.equal=T) 84 | testTTest "two-sample t-test SamplesDiffer Student" 85 | (mkPValue 0.03410) (fromJust $ studentTTest SamplesDiffer sample1 sample2) 86 | -- R: t.test(sample1, sample2, alt="two.sided", var.equal=F) 87 | , testTTest "two-sample t-test SamplesDiffer Welch" 88 | (mkPValue 0.03483) (fromJust $ welchTTest SamplesDiffer sample1 sample2) 89 | -- R: t.test(sample1, sample2, alt="two.sided", paired=T) 90 | , testTTest "two-sample t-test SamplesDiffer Paired" 91 | (mkPValue 0.03411) (fromJust $ pairedTTest SamplesDiffer sample12) 92 | -- R: t.test(sample1, sample2, alt="less", var.equal=T) 93 | , testTTest "two-sample t-test BGreater Student" 94 | (mkPValue 0.01705) (fromJust $ studentTTest BGreater sample1 sample2) 95 | -- R: t.test(sample1, sample2, alt="less", var.equal=F) 96 | , testTTest "two-sample t-test BGreater Welch" 97 | (mkPValue 0.01741) (fromJust $ welchTTest BGreater sample1 sample2) 98 | -- R: t.test(sample1, sample2, alt="less", paired=F) 99 | , testTTest "two-sample t-test BGreater Paired" 100 | (mkPValue 0.01705) (fromJust $ pairedTTest BGreater sample12) 101 | ] 102 | where sample12 = U.zip sample1 sample2 103 | -------------------------------------------------------------------------------- /tests/Tests/Quantile.hs: -------------------------------------------------------------------------------- 1 | {-# LANGUAGE ViewPatterns #-} 2 | -- | 3 | -- Tests for quantile 4 | module Tests.Quantile (tests) where 5 | 6 | import Control.Exception 7 | import qualified Data.Vector.Unboxed as U 8 | import Test.Tasty 9 | import Test.Tasty.HUnit 10 | import Test.Tasty.QuickCheck hiding (sample) 11 | import Numeric.MathFunctions.Comparison (ulpDelta,ulpDistance) 12 | import Statistics.Quantile 13 | 14 | tests :: TestTree 15 | tests = testGroup "Quantiles" 16 | [ testCase "R alg. 4" $ compareWithR cadpw (0.00, 0.50, 2.50, 8.25, 10.00) 17 | , testCase "R alg. 5" $ compareWithR hazen (0.00, 1.00, 5.00, 9.00, 10.00) 18 | , testCase "R alg. 6" $ compareWithR spss (0.00, 0.75, 5.00, 9.25, 10.00) 19 | , testCase "R alg. 7" $ compareWithR s (0.000, 1.375, 5.000, 8.625,10.00) 20 | , testCase "R alg. 8" $ compareWithR medianUnbiased 21 | (0.0, 0.9166666666666667, 5.000000000000003, 9.083333333333334, 10.0) 22 | , testCase "R alg. 9" $ compareWithR normalUnbiased 23 | (0.0000, 0.9375, 5.0000, 9.0625, 10.0000) 24 | , testProperty "alg 7." propWeigtedAverage 25 | -- Test failures 26 | , testCase "weightedAvg should throw errors" $ do 27 | let xs = U.fromList [1,2,3] 28 | xs0 = U.fromList [] 29 | shouldError "Empty sample" $ weightedAvg 1 4 xs0 30 | shouldError "N=0" $ weightedAvg 1 0 xs 31 | shouldError "N=1" $ weightedAvg 1 1 xs 32 | shouldError "k<0" $ weightedAvg (-1) 4 xs 33 | shouldError "k>N" $ weightedAvg 5 4 xs 34 | , testCase "quantile should throw errors" $ do 35 | let xs = U.fromList [1,2,3] 36 | xs0 = U.fromList [] 37 | shouldError "Empty xs" $ quantile s 1 4 xs0 38 | shouldError "N=0" $ quantile s 1 0 xs 39 | shouldError "N=1" $ quantile s 1 1 xs 40 | shouldError "k<0" $ quantile s (-1) 4 xs 41 | shouldError "k>N" $ quantile s 5 4 xs 42 | -- 43 | , testProperty "quantiles are OK" propQuantiles 44 | , testProperty "quantilesVec are OK" propQuantilesVec 45 | ] 46 | 47 | sample :: U.Vector Double 48 | sample = U.fromList [0, 1, 2.5, 7.5, 9, 10] 49 | 50 | -- Compare quantiles implementation with reference R implementation 51 | compareWithR :: ContParam -> (Double,Double,Double,Double,Double) -> Assertion 52 | compareWithR p (q0,q1,q2,q3,q4) = do 53 | assertEqual "Q 0" q0 $ quantile p 0 4 sample 54 | assertEqual "Q 1" q1 $ quantile p 1 4 sample 55 | assertEqual "Q 2" q2 $ quantile p 2 4 sample 56 | assertEqual "Q 3" q3 $ quantile p 3 4 sample 57 | assertEqual "Q 4" q4 $ quantile p 4 4 sample 58 | 59 | propWeigtedAverage :: Positive Int -> Positive Int -> Property 60 | propWeigtedAverage (Positive k) (Positive q) = 61 | (q >= 2 && k <= q) ==> let q1 = weightedAvg k q sample 62 | q2 = quantile s k q sample 63 | in counterexample ("weightedAvg = " ++ show q1) 64 | $ counterexample ("quantile = " ++ show q2) 65 | $ counterexample ("delta in ulps = " ++ show (ulpDelta q1 q2)) 66 | $ ulpDistance q1 q2 <= 16 67 | 68 | propQuantiles :: Positive Int -> Int -> Int -> NonEmptyList Double -> Property 69 | propQuantiles (Positive n) 70 | ((`mod` n) -> k1) 71 | ((`mod` n) -> k2) 72 | (NonEmpty xs) 73 | = n >= 2 74 | ==> [x1,x2] == quantiles s [k1,k2] n rndXs 75 | where 76 | rndXs = U.fromList xs 77 | x1 = quantile s k1 n rndXs 78 | x2 = quantile s k2 n rndXs 79 | 80 | propQuantilesVec :: Positive Int -> Int -> Int -> NonEmptyList Double -> Property 81 | propQuantilesVec (Positive n) 82 | ((`mod` n) -> k1) 83 | ((`mod` n) -> k2) 84 | (NonEmpty xs) 85 | = n >= 2 86 | ==> U.fromList [x1,x2] == quantilesVec s (U.fromList [k1,k2]) n rndXs 87 | where 88 | rndXs = U.fromList xs 89 | x1 = quantile s k1 n rndXs 90 | x2 = quantile s k2 n rndXs 91 | 92 | 93 | shouldError :: String -> a -> Assertion 94 | shouldError nm x = do 95 | r <- try (evaluate x) 96 | case r of 97 | Left (ErrorCall{}) -> return () 98 | Right _ -> assertFailure ("Should call error: " ++ nm) 99 | -------------------------------------------------------------------------------- /tests/Tests/Serialization.hs: -------------------------------------------------------------------------------- 1 | -- | 2 | -- Tests for data serialization instances 3 | module Tests.Serialization where 4 | 5 | import Data.Binary (Binary,decode,encode) 6 | import Data.Aeson (FromJSON,ToJSON,Result(..),toJSON,fromJSON) 7 | import Data.Typeable 8 | 9 | import Statistics.Distribution.Beta (BetaDistribution) 10 | import Statistics.Distribution.Binomial (BinomialDistribution) 11 | import Statistics.Distribution.CauchyLorentz 12 | import Statistics.Distribution.ChiSquared (ChiSquared) 13 | import Statistics.Distribution.Exponential (ExponentialDistribution) 14 | import Statistics.Distribution.FDistribution (FDistribution) 15 | import Statistics.Distribution.Gamma (GammaDistribution) 16 | import Statistics.Distribution.Geometric 17 | import Statistics.Distribution.Hypergeometric 18 | import Statistics.Distribution.Laplace (LaplaceDistribution) 19 | import Statistics.Distribution.Lognormal (LognormalDistribution) 20 | import Statistics.Distribution.NegativeBinomial (NegativeBinomialDistribution) 21 | import Statistics.Distribution.Normal (NormalDistribution) 22 | import Statistics.Distribution.Poisson (PoissonDistribution) 23 | import Statistics.Distribution.StudentT 24 | import Statistics.Distribution.Transform (LinearTransform) 25 | import Statistics.Distribution.Uniform (UniformDistribution) 26 | import Statistics.Distribution.Weibull (WeibullDistribution) 27 | import Statistics.Types 28 | 29 | import Test.Tasty (TestTree, testGroup) 30 | import Test.Tasty.QuickCheck (testProperty) 31 | import Test.QuickCheck as QC 32 | 33 | import Tests.Helpers 34 | import Tests.Orphanage () 35 | 36 | 37 | tests :: TestTree 38 | tests = testGroup "Test for data serialization" 39 | [ serializationTests (T :: T (CL Float)) 40 | , serializationTests (T :: T (CL Double)) 41 | , serializationTests (T :: T (PValue Float)) 42 | , serializationTests (T :: T (PValue Double)) 43 | , serializationTests (T :: T (NormalErr Double)) 44 | , serializationTests (T :: T (ConfInt Double)) 45 | , serializationTests' "T (Estimate NormalErr Double)" (T :: T (Estimate NormalErr Double)) 46 | , serializationTests' "T (Estimate ConfInt Double)" (T :: T (Estimate ConfInt Double)) 47 | , serializationTests (T :: T (LowerLimit Double)) 48 | , serializationTests (T :: T (UpperLimit Double)) 49 | -- Distributions 50 | , serializationTests (T :: T BetaDistribution ) 51 | , serializationTests (T :: T CauchyDistribution ) 52 | , serializationTests (T :: T ChiSquared ) 53 | , serializationTests (T :: T ExponentialDistribution ) 54 | , serializationTests (T :: T GammaDistribution ) 55 | , serializationTests (T :: T LaplaceDistribution ) 56 | , serializationTests (T :: T LognormalDistribution ) 57 | , serializationTests (T :: T NegativeBinomialDistribution ) 58 | , serializationTests (T :: T NormalDistribution ) 59 | , serializationTests (T :: T UniformDistribution ) 60 | , serializationTests (T :: T WeibullDistribution ) 61 | , serializationTests (T :: T StudentT ) 62 | , serializationTests (T :: T (LinearTransform NormalDistribution)) 63 | , serializationTests (T :: T FDistribution ) 64 | , serializationTests (T :: T BinomialDistribution ) 65 | , serializationTests (T :: T GeometricDistribution ) 66 | , serializationTests (T :: T GeometricDistribution0 ) 67 | , serializationTests (T :: T HypergeometricDistribution ) 68 | , serializationTests (T :: T PoissonDistribution ) 69 | ] 70 | 71 | 72 | serializationTests 73 | :: (Eq a, Typeable a, Binary a, Show a, Read a, ToJSON a, FromJSON a, Arbitrary a) 74 | => T a -> TestTree 75 | serializationTests t = serializationTests' (typeName t) t 76 | 77 | -- Not all types are Typeable, unfortunately 78 | serializationTests' 79 | :: (Eq a, Binary a, Show a, Read a, ToJSON a, FromJSON a, Arbitrary a) 80 | => String -> T a -> TestTree 81 | serializationTests' name t = testGroup ("Tests for: " ++ name) 82 | [ testProperty "show/read" (p_showRead t) 83 | , testProperty "binary" (p_binary t) 84 | , testProperty "aeson" (p_aeson t) 85 | ] 86 | 87 | 88 | 89 | p_binary :: (Eq a, Binary a) => T a -> a -> Bool 90 | p_binary _ a = a == (decode . encode) a 91 | 92 | p_showRead :: (Eq a, Read a, Show a) => T a -> a -> Bool 93 | p_showRead _ a = a == (read . show) a 94 | 95 | p_aeson :: (Eq a, ToJSON a, FromJSON a) => T a -> a -> Bool 96 | p_aeson _ a = Data.Aeson.Success a == (fromJSON . toJSON) a 97 | -------------------------------------------------------------------------------- /tests/Tests/Transform.hs: -------------------------------------------------------------------------------- 1 | {-# LANGUAGE FlexibleInstances #-} 2 | {-# LANGUAGE FlexibleContexts #-} 3 | {-# LANGUAGE ViewPatterns #-} 4 | module Tests.Transform 5 | ( 6 | tests 7 | ) where 8 | 9 | import Data.Bits ((.&.), shiftL) 10 | import Data.Complex (Complex((:+))) 11 | import Numeric.Sum (kbn, sumVector) 12 | import Statistics.Function (within) 13 | import Statistics.Transform (CD, dct, fft, idct, ifft) 14 | import Test.Tasty (TestTree, testGroup) 15 | import Test.Tasty.QuickCheck (testProperty) 16 | import Test.QuickCheck ( Positive(..), Arbitrary(..), Blind(..), (==>), Gen 17 | , choose, vectorOf, counterexample, forAll) 18 | import Test.QuickCheck.Property (Property(..)) 19 | import Tests.Helpers (testAssertion) 20 | import Text.Printf (printf) 21 | import qualified Data.Vector.Generic as G 22 | import qualified Data.Vector.Unboxed as U 23 | 24 | 25 | tests :: TestTree 26 | tests = testGroup "fft" [ 27 | testProperty "t_impulse" t_impulse 28 | , testProperty "t_impulse_offset" t_impulse_offset 29 | , testProperty "ifft . fft = id" (t_fftInverse $ ifft . fft) 30 | , testProperty "fft . ifft = id" (t_fftInverse $ fft . ifft) 31 | , testProperty "idct . dct = id [up to scale]" 32 | (t_fftInverse (\v -> U.map (/ (2 * fromIntegral (U.length v))) $ idct $ dct v)) 33 | , testProperty "dct . idct = id [up to scale]" 34 | (t_fftInverse (\v -> U.map (/ (2 * fromIntegral (U.length v))) $ idct $ dct v)) 35 | -- Exact small size DCT 36 | -- 1 37 | , testDCT [1] $ [2] 38 | -- 2 39 | , testDCT [1,0] $ map (*2) [1, cos (pi/4) ] 40 | , testDCT [0,1] $ map (*2) [1, cos (3*pi/4) ] 41 | -- 4 42 | , testDCT [1,0,0,0] $ map (*2) [1, cos( pi/8), cos( 2*pi/8), cos( 3*pi/8)] 43 | , testDCT [0,1,0,0] $ map (*2) [1, cos(3*pi/8), cos( 6*pi/8), cos( 9*pi/8)] 44 | , testDCT [0,0,1,0] $ map (*2) [1, cos(5*pi/8), cos(10*pi/8), cos(15*pi/8)] 45 | , testDCT [0,0,0,1] $ map (*2) [1, cos(7*pi/8), cos(14*pi/8), cos(21*pi/8)] 46 | -- Exact small size IDCT 47 | -- 1 48 | , testIDCT [1] [1] 49 | -- 2 50 | , testIDCT [1,0] [1, 1 ] 51 | , testIDCT [0,1] $ map (*2) [cos(pi/4), cos(3*pi/4)] 52 | -- 4 53 | , testIDCT [1,0,0,0] [1, 1, 1, 1 ] 54 | , testIDCT [0,1,0,0] $ map (*2) [cos( pi/8), cos( 3*pi/8), cos( 5*pi/8), cos( 7*pi/8) ] 55 | , testIDCT [0,0,1,0] $ map (*2) [cos( 2*pi/8), cos( 6*pi/8), cos(10*pi/8), cos(14*pi/8) ] 56 | , testIDCT [0,0,0,1] $ map (*2) [cos( 3*pi/8), cos( 9*pi/8), cos(15*pi/8), cos(21*pi/8) ] 57 | ] 58 | 59 | -- A single real-valued impulse at the beginning of an otherwise zero 60 | -- vector should be replicated in every real component of the result, 61 | -- and all the imaginary components should be zero. 62 | t_impulse :: Double -> Positive Int -> Bool 63 | t_impulse k (Positive m) = U.all (c_near i) (fft v) 64 | where v = i `G.cons` G.replicate (n-1) 0 65 | i = k :+ 0 66 | n = 1 `shiftL` (m .&. 6) 67 | 68 | -- If a real-valued impulse is offset from the beginning of an 69 | -- otherwise zero vector, the sum-of-squares of each component of the 70 | -- result should equal the square of the impulse. 71 | t_impulse_offset :: Double -> Positive Int -> Positive Int -> Property 72 | t_impulse_offset k (Positive x) (Positive m) 73 | -- For numbers smaller than 1e-162 their square underflows and test 74 | -- fails spuriously 75 | = abs k >= 1e-100 ==> U.all ok (fft v) 76 | where v = G.concat [G.replicate xn 0, G.singleton i, G.replicate (n-xn-1) 0] 77 | ok (re :+ im) = within ulps (re*re + im*im) (k*k) 78 | i = k :+ 0 79 | xn = x `rem` n 80 | n = 1 `shiftL` (m .&. 6) 81 | 82 | -- Test that (ifft . fft ≈ id) 83 | -- 84 | -- Approximate equality here is tricky. Smaller values of vector tend 85 | -- to have large relative error. Thus we should test that vectors as 86 | -- whole are approximate equal. 87 | t_fftInverse :: (HasNorm (U.Vector a), U.Unbox a, Num a, Show a, Arbitrary a) 88 | => (U.Vector a -> U.Vector a) -> Property 89 | t_fftInverse roundtrip = 90 | forAll (Blind <$> genFftVector) $ \(Blind x) -> 91 | let n = G.length x 92 | x' = roundtrip x 93 | d = G.zipWith (-) x x' 94 | nd = vectorNorm d 95 | nx = vectorNorm x 96 | in counterexample "Original vector" 97 | $ counterexample (show x ) 98 | $ counterexample "Transformed one" 99 | $ counterexample (show x') 100 | $ counterexample (printf "Length = %i" n) 101 | $ counterexample (printf "|x - x'| / |x| = %.6g" (nd / nx)) 102 | $ nd <= 3e-14 * nx 103 | 104 | -- Test discrete cosine transform 105 | testDCT :: [Double] -> [Double] -> TestTree 106 | testDCT (U.fromList -> vec) (U.fromList -> res) 107 | = testAssertion ("DCT test for " ++ show vec) 108 | $ vecEqual 3e-14 (dct vec) res 109 | 110 | -- Test inverse discrete cosine transform 111 | testIDCT :: [Double] -> [Double] -> TestTree 112 | testIDCT (U.fromList -> vec) (U.fromList -> res) 113 | = testAssertion ("IDCT test for " ++ show vec) 114 | $ vecEqual 3e-14 (idct vec) res 115 | 116 | 117 | 118 | ---------------------------------------------------------------- 119 | 120 | -- With an error tolerance of 8 ULPs, a million QuickCheck tests are 121 | -- likely to all succeed. With a tolerance of 7, we fail around the 122 | -- half million mark. 123 | ulps :: Int 124 | ulps = 8 125 | 126 | c_near :: CD -> CD -> Bool 127 | c_near (a :+ b) (c :+ d) = within ulps a c && within ulps b d 128 | 129 | -- Arbitrary vector for FFT od DCT 130 | genFftVector :: (U.Unbox a, Arbitrary a) => Gen (U.Vector a) 131 | genFftVector = do 132 | n <- (2^) <$> choose (1,9::Int) -- Size of vector 133 | G.fromList <$> vectorOf n arbitrary -- Vector to transform 134 | 135 | -- Ad-hoc type class for calculation of vector norm 136 | class HasNorm a where 137 | vectorNorm :: a -> Double 138 | 139 | instance HasNorm (U.Vector Double) where 140 | vectorNorm = sqrt . sumVector kbn . U.map (\x -> x*x) 141 | 142 | instance HasNorm (U.Vector CD) where 143 | vectorNorm = sqrt . sumVector kbn . U.map (\(x :+ y) -> x*x + y*y) 144 | 145 | -- Approximate equality for vectors 146 | vecEqual :: Double -> U.Vector Double -> U.Vector Double -> Bool 147 | vecEqual ε v u 148 | = vectorNorm (U.zipWith (-) v u) < ε * vectorNorm v 149 | -------------------------------------------------------------------------------- /tests/doctest.hs: -------------------------------------------------------------------------------- 1 | import Test.DocTest (doctest) 2 | 3 | main :: IO () 4 | main = doctest ["-XHaskell2010", "Statistics"] 5 | 6 | -------------------------------------------------------------------------------- /tests/tests.hs: -------------------------------------------------------------------------------- 1 | import Test.Tasty (defaultMain,testGroup) 2 | 3 | import qualified Tests.Distribution 4 | import qualified Tests.Function 5 | import qualified Tests.KDE 6 | import qualified Tests.Matrix 7 | import qualified Tests.NonParametric 8 | import qualified Tests.Parametric 9 | import qualified Tests.Transform 10 | import qualified Tests.Correlation 11 | import qualified Tests.Serialization 12 | import qualified Tests.Quantile 13 | 14 | main :: IO () 15 | main = defaultMain $ testGroup "statistics" 16 | [ Tests.Distribution.tests 17 | , Tests.Function.tests 18 | , Tests.KDE.tests 19 | , Tests.Matrix.tests 20 | , Tests.NonParametric.tests 21 | , Tests.Parametric.tests 22 | , Tests.Transform.tests 23 | , Tests.Correlation.tests 24 | , Tests.Serialization.tests 25 | , Tests.Quantile.tests 26 | ] 27 | -------------------------------------------------------------------------------- /tests/utils/Makefile: -------------------------------------------------------------------------------- 1 | C = gcc 2 | CFLAGS = -W -Wall -O2 -std=c99 3 | LDFLAGS = -lfftw3 4 | 5 | .PHONY: all clean 6 | 7 | all : fftw 8 | clean : 9 | rm -rf fftw *.o 10 | -------------------------------------------------------------------------------- /tests/utils/fftw.c: -------------------------------------------------------------------------------- 1 | /* Generate some test cases using fftw3 */ 2 | #include 3 | #include 4 | #include 5 | 6 | void dump_vector(int n, double* vec) { 7 | for(int i = 0; i < n; i++) 8 | printf("%20.15f ", vec[i]); 9 | printf("\n"); 10 | } 11 | 12 | void dct(int flag, int n) { 13 | double* in = malloc( n * sizeof(double)); 14 | double* out = malloc( n * sizeof(double)); 15 | // 16 | fftw_plan plan = fftw_plan_r2r_1d(n, in, out, flag, FFTW_ESTIMATE); 17 | for( int k = 0; k < n; k++) { 18 | // Init input vector 19 | for( int i = 0; i < n; i++) 20 | in[i] = 0; 21 | in[k] = 1; 22 | // Perform DFT 23 | fftw_execute(plan); 24 | // Print results 25 | dump_vector(n, in ); 26 | dump_vector(n, out); 27 | printf("\n"); 28 | } 29 | // 30 | free(in); 31 | free(out); 32 | fftw_destroy_plan(plan); 33 | } 34 | 35 | int main(void) 36 | { 37 | printf("DCT II (the DCT)\n"); 38 | dct( FFTW_REDFT10, 2); 39 | dct( FFTW_REDFT10, 4); 40 | 41 | printf("DCT III (Inverse DCT)\n"); 42 | dct( FFTW_REDFT01, 2); 43 | dct( FFTW_REDFT01, 4); 44 | 45 | return 0; 46 | } 47 | --------------------------------------------------------------------------------