├── .github
    └── workflows
    │   └── ci.yml
├── .gitignore
├── .hgignore
├── .hgtags
├── LICENSE
├── README.markdown
├── Statistics
    ├── Autocorrelation.hs
    ├── ConfidenceInt.hs
    ├── Correlation.hs
    ├── Correlation
    │   └── Kendall.hs
    ├── Distribution.hs
    ├── Distribution
    │   ├── Beta.hs
    │   ├── Binomial.hs
    │   ├── CauchyLorentz.hs
    │   ├── ChiSquared.hs
    │   ├── DiscreteUniform.hs
    │   ├── Exponential.hs
    │   ├── FDistribution.hs
    │   ├── Gamma.hs
    │   ├── Geometric.hs
    │   ├── Hypergeometric.hs
    │   ├── Laplace.hs
    │   ├── Lognormal.hs
    │   ├── NegativeBinomial.hs
    │   ├── Normal.hs
    │   ├── Poisson.hs
    │   ├── Poisson
    │   │   └── Internal.hs
    │   ├── StudentT.hs
    │   ├── Transform.hs
    │   ├── Uniform.hs
    │   └── Weibull.hs
    ├── Function.hs
    ├── Internal.hs
    ├── Quantile.hs
    ├── Regression.hs
    ├── Resampling.hs
    ├── Resampling
    │   └── Bootstrap.hs
    ├── Sample.hs
    ├── Sample
    │   ├── Histogram.hs
    │   ├── Internal.hs
    │   ├── KernelDensity.hs
    │   ├── KernelDensity
    │   │   └── Simple.hs
    │   ├── Normalize.hs
    │   └── Powers.hs
    ├── Test
    │   ├── ChiSquared.hs
    │   ├── Internal.hs
    │   ├── KolmogorovSmirnov.hs
    │   ├── KruskalWallis.hs
    │   ├── MannWhitneyU.hs
    │   ├── Runs.hs
    │   ├── StudentT.hs
    │   ├── Types.hs
    │   └── WilcoxonT.hs
    ├── Transform.hs
    ├── Types.hs
    └── Types
    │   └── Internal.hs
├── bench-papi
    └── Bench.hs
├── bench-time
    └── Bench.hs
├── benchmark
    └── bench.hs
├── cabal.project
├── changelog.md
├── dense-linear-algebra
    ├── LICENSE
    ├── README.md
    ├── dense-linear-algebra.cabal
    ├── src
    │   └── Statistics
    │   │   ├── Matrix.hs
    │   │   └── Matrix
    │   │       ├── Algorithms.hs
    │   │       ├── Function.hs
    │   │       ├── Mutable.hs
    │   │       └── Types.hs
    └── test
    │   ├── LibSpec.hs
    │   └── Spec.hs
├── examples
    └── kde
    │   ├── KDE.hs
    │   ├── data
    │       └── faithful.csv
    │   ├── kde.html
    │   └── kde.tpl
├── statistics.cabal
└── tests
    ├── Tests
        ├── ApproxEq.hs
        ├── Correlation.hs
        ├── Distribution.hs
        ├── ExactDistribution.hs
        ├── Function.hs
        ├── Helpers.hs
        ├── KDE.hs
        ├── Matrix.hs
        ├── Matrix
        │   └── Types.hs
        ├── NonParametric.hs
        ├── NonParametric
        │   └── Table.hs
        ├── Orphanage.hs
        ├── Parametric.hs
        ├── Quantile.hs
        ├── Serialization.hs
        └── Transform.hs
    ├── doctest.hs
    ├── tests.hs
    └── utils
        ├── Makefile
        └── fftw.c


/.github/workflows/ci.yml:
--------------------------------------------------------------------------------
 1 | name: CI
 2 | 
 3 | # Trigger the workflow on push or pull request, but only for the master branch
 4 | on:
 5 |   pull_request:
 6 |   push:
 7 |     branches: [master]
 8 | 
 9 | defaults:
10 |   run:
11 |     shell: bash
12 | 
13 | jobs:
14 |   cabal:
15 |     name: ${{ matrix.os }} / ghc ${{ matrix.ghc }}
16 |     runs-on: ${{ matrix.os }}
17 |     strategy:
18 |       matrix:
19 |         include:
20 |           # Linux
21 |           - { cabal: "3.12", os: ubuntu-latest,  ghc: "8.4.4"  }
22 |           - { cabal: "3.12", os: ubuntu-latest,  ghc: "8.6.5"  }
23 |           - { cabal: "3.12", os: ubuntu-latest,  ghc: "8.8.4"  }
24 |           - { cabal: "3.12", os: ubuntu-latest,  ghc: "8.10.7" }
25 |           - { cabal: "3.12", os: ubuntu-latest,  ghc: "9.0.2"  }
26 |           - { cabal: "3.12", os: ubuntu-latest,  ghc: "9.2.8"  }
27 |           - { cabal: "3.12", os: ubuntu-latest,  ghc: "9.4.8"  }
28 |           - { cabal: "3.12", os: ubuntu-latest,  ghc: "9.6.6"  }
29 |           - { cabal: "3.12", os: ubuntu-latest,  ghc: "9.8.4"  }
30 |           - { cabal: "3.12", os: ubuntu-latest,  ghc: "9.10.1" }
31 |           # Fails to resolve aeson [2025.01.14]
32 |           # - { cabal: "3.12", os: ubuntu-latest,  ghc: "9.12.1" }
33 |       fail-fast: false
34 | 
35 |     steps:
36 |     # ----------------
37 |     - uses: actions/checkout@v4
38 |     # ----------------
39 |     - uses: haskell-actions/setup@v2
40 |       id: setup-haskell-cabal
41 |       name: Setup Haskell
42 |       with:
43 |         ghc-version: ${{ matrix.ghc }}
44 |         cabal-version: ${{ matrix.cabal }}
45 |     # ----------------
46 |     - uses: actions/cache@v3
47 |       name: Cache ~/.cabal/store
48 |       with:
49 |         path: ${{ steps.setup-haskell-cabal.outputs.cabal-store }}
50 |         key: ${{ runner.os }}-${{ matrix.ghc }}--${{ github.Shah }}-CACHE_V3
51 |     # ----------------
52 |     - name: "Install PAPI"
53 |       run: |
54 |         sudo apt-get install -y libpapi-dev
55 |     # ----------------
56 |     - name: Versions
57 |       run: |
58 |         cabal -V
59 |         ghc -V
60 |     # ----------------
61 |     - name: Make sdist
62 |       run: |
63 |         mkdir sdist
64 |         cabal sdist -o sdist
65 |     - name: Unpack
66 |       run: |
67 |         mkdir unpacked
68 |         tar -C unpacked -xzf sdist/statistics*tar.gz
69 |     # ----------------
70 |     - name: cabal check
71 |       run: |
72 |         cd unpacked/statistics-*
73 |         cabal -vnormal check
74 |     # ----------------
75 |     - name: Build
76 |       run: |
77 |         cd unpacked/statistics-*
78 |         cabal configure --haddock-all --enable-tests --enable-benchmarks
79 |         cabal build all --write-ghc-environment-files=always
80 |     # ----------------
81 |     - name: Test
82 |       run: |
83 |         cd unpacked/statistics-*
84 |         cabal test all
85 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | dist
2 | dist-newstyle
3 | .ghc.environment.*
4 | cabal.sandbox.config
5 | .stack-work
6 | TAGS
7 | 


--------------------------------------------------------------------------------
/.hgignore:
--------------------------------------------------------------------------------
1 | ^dist$
2 | \.(?:aux|eventlog|h[ip]|log|[oa]|orig|prof|ps|swp)$
3 | ~$
4 | syntax: glob
5 | .\#*
6 | 


--------------------------------------------------------------------------------
/.hgtags:
--------------------------------------------------------------------------------
 1 | b0800d494d66a079356c40fc0a0f6c7943cfce3f 0.1
 2 | c827bab8b4821bb3251b71fa350f13420849f3c9 0.2.2
 3 | d270e9663fba54f27698b704c05258038549901b 0.2.1
 4 | 1f3d1ad64c4fa810550615a085a5df37675d9d33 0.2
 5 | b71affe5caf6d20ec80c2800797053d56c39a283 0.3.6
 6 | 9daa8a8be77e1c1d80e33f33870e02cfb972af45 0.3.3
 7 | 5befb235d2697a79ebaea6e1c329a86ad1ad65b7 0.3.4
 8 | 6d59726b207a8e4c348ce3a4cc5fa8bae78db0a3 0.3.2
 9 | e36be8ad5175714058d9c31f3ed8fc940bcbb32c 0.3
10 | 7d9e5ad43b3f40c05cd82a82b6dc9507caee5a44 0.3.1
11 | b38c4bbadd4f7a4c8fca2e821019e7eee24a546c 0.3.5
12 | 007059a2c8874c822d6285f85532ec78c05b54ec 0.4
13 | d6696151a449d08dcdb5070c7682dde824512b7a 0.4.1
14 | ea8afae6deac29b15938122c61fa0c5ea1bee68c 0.5.1.0
15 | 96e1f328a83222884da8e44f6bba000378738e3a 0.5.1.1
16 | 9e58943cd825fe7e1a774bcaf0c26293ae7586a3 0.5.1.2
17 | 612bf90089edda4c337bfdae29c33cac45dd05ba 0.5.0.0
18 | 6658e21f4841fa9e0c05f63589ddb08c302cd9c1 0.6.0.0
19 | 77600bbdf0687644238faa1470c45c356f5cb4f2 0.6.0.1
20 | c28c06a9b6a82c4a33abdd86cffbd629b1631dc8 0.7.0.0
21 | b8ca72bbe71b5945e112ff45d59d4b45fdabad9c 0.8.0.0
22 | 83f39a545916a9da040f485d1d7f1ca1f5318ca4 0.8.0.5
23 | a60219fe5db2dd4820abef3b71926b29eea3ef86 0.8.0.1
24 | 6b6af0a56b20124868489664902ebd228b5afc29 0.8.0.2
25 | 9296e74ddd0ca476e0bcd1bd855476dafc6c825b 0.8.0.3
26 | b537df70181bbf4c267d658939df0009db73298a 0.8.0.4
27 | dc13e819406268870a9e442a5d329207d0012f5f 0.9.0.0
28 | 16eb25b56e88b8bb119f4b4b096cd951bb89f752 0.10.0.1
29 | e64eaba9a6eece4145ca7c5aba5c9ba11da99dfe 0.10.0.0
30 | eb901892b02af9efd5b8b46c77fe289240b6c69a 0.10.1.0
31 | e0171baa0d1f98d7b196a8bcbc9df889abc248bd 0.10.2.0
32 | beff76cd665adf846968b0248c6a1fc3daef005c 0.10.3.0
33 | 34ee28fbbc7fdd03739bae3094c6f4e511392dd6 0.10.3.1
34 | c1c80573883800f2c73ff5a31212d8c688a7d562 0.10.4.0
35 | d916be3f82405fa466005fa11c9bdc8c968cfff9 0.10.4.1
36 | 9711a2d79dfd882fdeb39479a4b2f4365dda448f 0.10.5.1
37 | 269f9457083ae21cdc785f623673b627361efe09 0.10.5.2
38 | d6fc5ae89aafbe335200f2c8d2ba2117322bd206 0.11.0.0
39 | 2f8fab6443bfb77c499852e6499df7ecd7f8c6ce 0.11.0.1
40 | f1452afc54f07559f5c85cb71db7a7e38d1456f1 0.11.0.2
41 | c75dd236003cf7eadab302d31f7523767407ea92 0.11.0.3
42 | 6de06c0e42a93bb602573fe15bab9fa32f82857a 0.12.0.0
43 | 6e37ee886a2d5dc8bf56e1d8e38b4cb08a845b90 0.13.1.0
44 | 2eed7bb5d817365a264ffc9b51f60e54d12cf328 0.13.1.1
45 | 74572dc8487579c11f69400b91863eb1a150e507 0.13.2.0
46 | e9452e9e69cedccbcbd13e407c38cca6286ac66f 0.13.2.2
47 | b4307c59867021ffc58f5afa62121f99b8d352ff 0.13.2.1
48 | 4302388f1acb398ec24f959175542be7addee36f 0.13.2.3
49 | ca30efd89890b8a6c871049111be08cab6eefc21 0.13.3.0
50 | 5335fdac4baf7efefb8409743a5ba9d173ec8845 0.14.0.0
51 | 58427e7c3066951165dbefe0c4d976ce51689372 0.14.0.1
52 | d3e97bb448d7462b4e49532b7632a213cf874746 0.14.0.2
53 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | Copyright (c) 2009, 2010 Bryan O'Sullivan
 2 | All rights reserved.
 3 | 
 4 | Redistribution and use in source and binary forms, with or without
 5 | modification, are permitted provided that the following conditions
 6 | are met:
 7 | 
 8 |     * Redistributions of source code must retain the above copyright
 9 |       notice, this list of conditions and the following disclaimer.
10 | 
11 |     * Redistributions in binary form must reproduce the above
12 |       copyright notice, this list of conditions and the following
13 |       disclaimer in the documentation and/or other materials provided
14 |       with the distribution.
15 | 
16 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
17 | "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
18 | LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
19 | A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
20 | OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
21 | SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
22 | LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23 | DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24 | THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25 | (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
26 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27 | 


--------------------------------------------------------------------------------
/README.markdown:
--------------------------------------------------------------------------------
 1 | # Statistics: efficient, general purpose statistics
 2 | 
 3 | This package provides the Statistics module, a Haskell library for
 4 | working with statistical data in a space- and time-efficient way.
 5 | 
 6 | Where possible, we give citations and computational complexity
 7 | estimates for the algorithms used.
 8 | 
 9 | 
10 | # Performance
11 | 
12 | This library has been carefully optimised for high performance.  To
13 | obtain the best runtime efficiency, it is imperative to compile
14 | libraries and applications that use this library using a high level of
15 | optimisation.
16 | 
17 | 
18 | # Get involved!
19 | 
20 | Please report bugs via the
21 | [github issue tracker](https://github.com/haskell/statistics/issues).
22 | 
23 | Master [git mirror](https://github.com/haskell/statistics):
24 | 
25 | * `git clone git://github.com/haskell/statistics.git`
26 | 
27 | # Authors
28 | 
29 | This library is written and maintained by Bryan O'Sullivan,
30 | <bos@serpentine.com>.
31 | 


--------------------------------------------------------------------------------
/Statistics/Autocorrelation.hs:
--------------------------------------------------------------------------------
 1 | {-# LANGUAGE FlexibleContexts #-}
 2 | -- |
 3 | -- Module    : Statistics.Autocorrelation
 4 | -- Copyright : (c) 2009 Bryan O'Sullivan
 5 | -- License   : BSD3
 6 | --
 7 | -- Maintainer  : bos@serpentine.com
 8 | -- Stability   : experimental
 9 | -- Portability : portable
10 | --
11 | -- Functions for computing autocovariance and autocorrelation of a
12 | -- sample.
13 | 
14 | module Statistics.Autocorrelation
15 |     (
16 |       autocovariance
17 |     , autocorrelation
18 |     ) where
19 | 
20 | import Prelude hiding (sum)
21 | import Statistics.Function (square)
22 | import Statistics.Sample (mean)
23 | import Statistics.Sample.Internal (sum)
24 | import qualified Data.Vector.Generic as G
25 | 
26 | -- | Compute the autocovariance of a sample, i.e. the covariance of
27 | -- the sample against a shifted version of itself.
28 | autocovariance :: (G.Vector v Double, G.Vector v Int) => v Double -> v Double
29 | autocovariance a = G.map f . G.enumFromTo 0 $ l-2
30 |   where
31 |     f k = sum (G.zipWith (*) (G.take (l-k) c) (G.slice k (l-k) c))
32 |           / fromIntegral l
33 |     c   = G.map (subtract (mean a)) a
34 |     l   = G.length a
35 | 
36 | -- | Compute the autocorrelation function of a sample, and the upper
37 | -- and lower bounds of confidence intervals for each element.
38 | --
39 | -- /Note/: The calculation of the 95% confidence interval assumes a
40 | -- stationary Gaussian process.
41 | autocorrelation :: (G.Vector v Double, G.Vector v Int) => v Double -> (v Double, v Double, v Double)
42 | autocorrelation a = (r, ci (-), ci (+))
43 |   where
44 |     r           = G.map (/ G.head c) c
45 |       where c   = autocovariance a
46 |     dllse       = G.map f . G.scanl1 (+) . G.map square $ r
47 |       where f v = 1.96 * sqrt ((v * 2 + 1) / l)
48 |     l           = fromIntegral (G.length a)
49 |     ci f        = G.cons 1 . G.tail . G.map (f (-1/l)) $ dllse
50 | 


--------------------------------------------------------------------------------
/Statistics/ConfidenceInt.hs:
--------------------------------------------------------------------------------
 1 | {-# LANGUAGE ViewPatterns #-}
 2 | -- | Calculation of confidence intervals
 3 | module Statistics.ConfidenceInt (
 4 |     poissonCI
 5 |   , poissonNormalCI
 6 |   , binomialCI
 7 |   , naiveBinomialCI
 8 |     -- * References
 9 |     -- $references
10 |   ) where
11 | 
12 | import Statistics.Distribution
13 | import Statistics.Distribution.ChiSquared
14 | import Statistics.Distribution.Beta
15 | import Statistics.Types
16 | 
17 | 
18 | 
19 | -- | Calculate confidence intervals for Poisson-distributed value
20 | -- using normal approximation
21 | poissonNormalCI :: Int -> Estimate NormalErr Double
22 | poissonNormalCI n
23 |   | n < 0     = error "Statistics.ConfidenceInt.poissonNormalCI negative number of trials"
24 |   | otherwise = estimateNormErr n' (sqrt n')
25 |   where
26 |     n' = fromIntegral n
27 | 
28 | -- | Calculate confidence intervals for Poisson-distributed value for
29 | --   single measurement. These are exact confidence intervals
30 | poissonCI :: CL Double -> Int -> Estimate ConfInt Double
31 | poissonCI cl@(significanceLevel -> p) n
32 |   | n <  0    = error "Statistics.ConfidenceInt.poissonCI: negative number of trials"
33 |   | n == 0    = estimateFromInterval m (0 ,m2) cl
34 |   | otherwise = estimateFromInterval m (m1,m2) cl
35 |   where
36 |     m  = fromIntegral n
37 |     m1 = 0.5 * quantile      (chiSquared (2*n  )) (p/2)
38 |     m2 = 0.5 * complQuantile (chiSquared (2*n+2)) (p/2)
39 | 
40 | -- | Calculate confidence interval using normal approximation. Note
41 | --   that this approximation breaks down when /p/ is either close to 0
42 | --   or to 1. In particular if @np < 5@ or @1 - np < 5@ this
43 | --   approximation shouldn't be used.
44 | naiveBinomialCI :: Int         -- ^ Number of trials
45 |                 -> Int         -- ^ Number of successes
46 |                 -> Estimate NormalErr Double
47 | naiveBinomialCI n k
48 |   | n <= 0 || k < 0 = error "Statistics.ConfidenceInt.naiveBinomialCI: negative number of events"
49 |   | k > n           = error "Statistics.ConfidenceInt.naiveBinomialCI: more successes than trials"
50 |   | otherwise       = estimateNormErr p σ
51 |   where
52 |     p = fromIntegral k / fromIntegral n
53 |     σ = sqrt $ p * (1 - p) / fromIntegral n
54 | 
55 | 
56 | -- | Clopper-Pearson confidence interval also known as exact
57 | --   confidence intervals.
58 | binomialCI :: CL Double
59 |            -> Int               -- ^ Number of trials
60 |            -> Int               -- ^ Number of successes
61 |            -> Estimate ConfInt Double
62 | binomialCI cl@(significanceLevel -> p) ni ki
63 |   | ni <= 0 || ki < 0 = error "Statistics.ConfidenceInt.binomialCI: negative number of events"
64 |   | ki > ni           = error "Statistics.ConfidenceInt.binomialCI: more successes than trials"
65 |   | ki == 0           = estimateFromInterval eff (0, ub) cl
66 |   | ni == ki          = estimateFromInterval eff (lb,0 ) cl
67 |   | otherwise         = estimateFromInterval eff (lb,ub) cl
68 |   where
69 |     k   = fromIntegral ki
70 |     n   = fromIntegral ni
71 |     eff = k / n
72 |     lb  = quantile      (betaDistr  k      (n - k + 1)) (p/2)
73 |     ub  = complQuantile (betaDistr (k + 1) (n - k)    ) (p/2)
74 | 
75 | 
76 | -- $references
77 | --
78 | --  * Clopper, C.; Pearson, E. S. (1934). "The use of confidence or
79 | --    fiducial limits illustrated in the case of the
80 | --    binomial". Biometrika 26: 404–413. doi:10.1093/biomet/26.4.404
81 | --
82 | --  * Brown, Lawrence D.; Cai, T. Tony; DasGupta, Anirban
83 | --    (2001). "Interval Estimation for a Binomial Proportion". Statistical
84 | --    Science 16 (2): 101–133. doi:10.1214/ss/1009213286. MR 1861069.
85 | --    Zbl 02068924.
86 | 


--------------------------------------------------------------------------------
/Statistics/Correlation.hs:
--------------------------------------------------------------------------------
  1 | {-# LANGUAGE FlexibleContexts #-}
  2 | {-# LANGUAGE BangPatterns #-}
  3 | -- |
  4 | -- Module      : Statistics.Correlation.Pearson
  5 | --
  6 | module Statistics.Correlation
  7 |     ( -- * Pearson correlation
  8 |       pearson
  9 |     , pearson2
 10 |     , pearsonMatByRow
 11 |       -- * Spearman correlation
 12 |     , spearman
 13 |     , spearman2
 14 |     , spearmanMatByRow
 15 |     ) where
 16 | 
 17 | import qualified Data.Vector.Generic as G
 18 | import qualified Data.Vector.Unboxed as U
 19 | import Statistics.Matrix
 20 | import Statistics.Sample
 21 | import Statistics.Test.Internal (rankUnsorted)
 22 | 
 23 | 
 24 | ----------------------------------------------------------------
 25 | -- Pearson
 26 | ----------------------------------------------------------------
 27 | 
 28 | -- | Pearson correlation for sample of pairs. Exactly same as
 29 | -- 'Statistics.Sample.correlation'
 30 | pearson :: (G.Vector v (Double, Double))
 31 |         => v (Double, Double) -> Double
 32 | pearson = correlation
 33 | {-# INLINE pearson #-}
 34 | 
 35 | -- | Pearson correlation for sample of pairs. Exactly same as
 36 | -- 'Statistics.Sample.correlation'
 37 | pearson2 :: (G.Vector v Double)
 38 |          => v Double -> v Double -> Double
 39 | pearson2 = correlation2
 40 | {-# INLINE pearson2 #-}
 41 | 
 42 | -- | Compute pairwise Pearson correlation between rows of a matrix
 43 | pearsonMatByRow :: Matrix -> Matrix
 44 | pearsonMatByRow m
 45 |   = generateSym (rows m)
 46 |       (\i j -> pearson $ row m i `U.zip` row m j)
 47 | {-# INLINE pearsonMatByRow #-}
 48 | 
 49 | 
 50 | 
 51 | ----------------------------------------------------------------
 52 | -- Spearman
 53 | ----------------------------------------------------------------
 54 | 
 55 | -- | Compute Spearman correlation between two samples
 56 | spearman :: ( Ord a
 57 |             , Ord b
 58 |             , G.Vector v a
 59 |             , G.Vector v b
 60 |             , G.Vector v (a, b)
 61 |             , G.Vector v Int
 62 |             , G.Vector v (Int, a)
 63 |             , G.Vector v (Int, b)
 64 |             )
 65 |          => v (a, b)
 66 |          -> Double
 67 | spearman xy
 68 |   = pearson
 69 |   $ G.zip (rankUnsorted x) (rankUnsorted y)
 70 |   where
 71 |     (x, y) = G.unzip xy
 72 | {-# INLINE spearman #-}
 73 | 
 74 | -- | Compute Spearman correlation between two samples. Samples must
 75 | --   have same length.
 76 | spearman2 :: ( Ord a
 77 |             , Ord b
 78 |             , G.Vector v a
 79 |             , G.Vector v b
 80 |             , G.Vector v Int
 81 |             , G.Vector v (Int, a)
 82 |             , G.Vector v (Int, b)
 83 |             )
 84 |          => v a
 85 |          -> v b
 86 |          -> Double
 87 | spearman2 xs ys
 88 |   | nx /= ny  = error "Statistics.Correlation.spearman2: samples must have same length"
 89 |   | otherwise = pearson $ G.zip (rankUnsorted xs) (rankUnsorted ys)
 90 |   where
 91 |     nx = G.length xs
 92 |     ny = G.length ys
 93 | {-# INLINE spearman2 #-}
 94 | 
 95 | -- | compute pairwise Spearman correlation between rows of a matrix
 96 | spearmanMatByRow :: Matrix -> Matrix
 97 | spearmanMatByRow
 98 |   = pearsonMatByRow . fromRows . fmap rankUnsorted . toRows
 99 | {-# INLINE spearmanMatByRow #-}
100 | 


--------------------------------------------------------------------------------
/Statistics/Correlation/Kendall.hs:
--------------------------------------------------------------------------------
  1 | {-# LANGUAGE BangPatterns, FlexibleContexts #-}
  2 | -- |
  3 | -- Module      : Statistics.Correlation.Kendall
  4 | --
  5 | -- Fast O(NlogN) implementation of
  6 | -- <http://en.wikipedia.org/wiki/Kendall_tau_rank_correlation_coefficient Kendall's tau>.
  7 | --
  8 | -- This module implements Kendall's tau form b which allows ties in the data.
  9 | -- This is the same formula used by other statistical packages, e.g., R, matlab.
 10 | --
 11 | -- > \tau = \frac{n_c - n_d}{\sqrt{(n_0 - n_1)(n_0 - n_2)}}
 12 | --
 13 | -- where n_0 = n(n-1)\/2, n_1 = number of pairs tied for the first quantify,
 14 | -- n_2 = number of pairs tied for the second quantify,
 15 | -- n_c = number of concordant pairs$, n_d = number of discordant pairs.
 16 | 
 17 | module Statistics.Correlation.Kendall
 18 |     ( kendall
 19 | 
 20 |     -- * References
 21 |     -- $references
 22 |     ) where
 23 | 
 24 | import Control.Monad.ST (ST, runST)
 25 | import Data.Bits (shiftR)
 26 | import Data.Function (on)
 27 | import Data.STRef
 28 | import qualified Data.Vector.Algorithms.Intro as I
 29 | import qualified Data.Vector.Generic as G
 30 | import qualified Data.Vector.Generic.Mutable as GM
 31 | 
 32 | -- | /O(nlogn)/ Compute the Kendall's tau from a vector of paired data.
 33 | -- Return NaN when number of pairs <= 1.
 34 | kendall :: (Ord a, Ord b, G.Vector v (a, b)) => v (a, b) -> Double
 35 | kendall xy'
 36 |   | G.length xy' <= 1 = 0/0
 37 |   | otherwise  = runST $ do
 38 |     xy <- G.thaw xy'
 39 |     let n = GM.length xy
 40 |     n_dRef <- newSTRef 0
 41 |     I.sort xy
 42 |     tieX <- numOfTiesBy ((==) `on` fst) xy
 43 |     tieXY <- numOfTiesBy (==) xy
 44 |     tmp <- GM.new n
 45 |     mergeSort (compare `on` snd) xy tmp n_dRef
 46 |     tieY <- numOfTiesBy ((==) `on` snd) xy
 47 |     n_d <- readSTRef n_dRef
 48 |     let n_0 = (fromIntegral n * (fromIntegral n-1)) `shiftR` 1 :: Integer
 49 |         n_c = n_0 - n_d - tieX - tieY + tieXY
 50 |     return $ fromIntegral (n_c - n_d) /
 51 |              (sqrt.fromIntegral) ((n_0 - tieX) * (n_0 - tieY))
 52 | {-# INLINE kendall #-}
 53 | 
 54 | -- calculate number of tied pairs in a sorted vector
 55 | numOfTiesBy :: GM.MVector v a
 56 |             => (a -> a -> Bool) -> v s a -> ST s Integer
 57 | numOfTiesBy f xs = do count <- newSTRef (0::Integer)
 58 |                       loop count (1::Int) (0::Int)
 59 |                       readSTRef count
 60 |   where
 61 |     n = GM.length xs
 62 |     loop c !acc !i | i >= n - 1 = modifySTRef' c (+ g acc)
 63 |                    | otherwise = do
 64 |                        x1 <- GM.unsafeRead xs i
 65 |                        x2 <- GM.unsafeRead xs (i+1)
 66 |                        if f x1 x2
 67 |                           then loop c (acc+1) (i+1)
 68 |                           else modifySTRef' c (+ g acc) >> loop c 1 (i+1)
 69 |     g x = fromIntegral ((x * (x - 1)) `shiftR` 1)
 70 | {-# INLINE numOfTiesBy #-}
 71 | 
 72 | -- Implementation of Knight's merge sort (adapted from vector-algorithm). This
 73 | -- function is used to count the number of discordant pairs.
 74 | mergeSort :: GM.MVector v e
 75 |           => (e -> e -> Ordering)
 76 |           -> v s e
 77 |           -> v s e
 78 |           -> STRef s Integer
 79 |           -> ST s ()
 80 | mergeSort cmp src buf count = loop 0 (GM.length src - 1)
 81 |   where
 82 |     loop l u
 83 |       | u == l = return ()
 84 |       | u - l == 1 = do
 85 |           eL <- GM.unsafeRead src l
 86 |           eU <- GM.unsafeRead src u
 87 |           case cmp eL eU of
 88 |               GT -> do GM.unsafeWrite src l eU
 89 |                        GM.unsafeWrite src u eL
 90 |                        modifySTRef' count (+1)
 91 |               _ -> return ()
 92 |       | otherwise  = do
 93 |           let mid = (u + l) `shiftR` 1
 94 |           loop l mid
 95 |           loop mid u
 96 |           merge cmp (GM.unsafeSlice l (u-l+1) src) buf (mid - l) count
 97 | {-# INLINE mergeSort #-}
 98 | 
 99 | merge :: GM.MVector v e
100 |       => (e -> e -> Ordering)
101 |       -> v s e
102 |       -> v s e
103 |       -> Int
104 |       -> STRef s Integer
105 |       -> ST s ()
106 | merge cmp src buf mid count = do GM.unsafeCopy tmp lower
107 |                                  eTmp <- GM.unsafeRead tmp 0
108 |                                  eUpp <- GM.unsafeRead upper 0
109 |                                  loop tmp 0 eTmp upper 0 eUpp 0
110 |   where
111 |     lower = GM.unsafeSlice 0 mid src
112 |     upper = GM.unsafeSlice mid (GM.length src - mid) src
113 |     tmp = GM.unsafeSlice 0 mid buf
114 |     wroteHigh low iLow eLow high iHigh iIns
115 |       | iHigh >= GM.length high =
116 |           GM.unsafeCopy (GM.unsafeSlice iIns (GM.length low - iLow) src)
117 |                         (GM.unsafeSlice iLow (GM.length low - iLow) low)
118 |       | otherwise = do eHigh <- GM.unsafeRead high iHigh
119 |                        loop low iLow eLow high iHigh eHigh iIns
120 | 
121 |     wroteLow low iLow high iHigh eHigh iIns
122 |       | iLow  >= GM.length low  = return ()
123 |       | otherwise = do eLow <- GM.unsafeRead low iLow
124 |                        loop low iLow eLow high iHigh eHigh iIns
125 | 
126 |     loop !low !iLow !eLow !high !iHigh !eHigh !iIns = case cmp eHigh eLow of
127 |         LT -> do GM.unsafeWrite src iIns eHigh
128 |                  modifySTRef' count (+ fromIntegral (GM.length low - iLow))
129 |                  wroteHigh low iLow eLow high (iHigh+1) (iIns+1)
130 |         _  -> do GM.unsafeWrite src iIns eLow
131 |                  wroteLow low (iLow+1) high iHigh eHigh (iIns+1)
132 | {-# INLINE merge #-}
133 | 
134 | -- $references
135 | --
136 | -- * William R. Knight. (1966) A computer method for calculating Kendall's Tau
137 | --   with ungrouped data. /Journal of the American Statistical Association/,
138 | --   Vol. 61, No. 314, Part 1, pp. 436-439. <http://www.jstor.org/pss/2282833>
139 | --
140 | 


--------------------------------------------------------------------------------
/Statistics/Distribution/Beta.hs:
--------------------------------------------------------------------------------
  1 | {-# LANGUAGE OverloadedStrings #-}
  2 | {-# LANGUAGE DeriveDataTypeable, DeriveGeneric #-}
  3 | -----------------------------------------------------------------------------
  4 | -- |
  5 | -- Module      :  Statistics.Distribution.Beta
  6 | -- Copyright   :  (C) 2012 Edward Kmett,
  7 | -- License     :  BSD-style (see the file LICENSE)
  8 | --
  9 | -- Maintainer  :  Edward Kmett <ekmett@gmail.com>
 10 | -- Stability   :  provisional
 11 | -- Portability :  DeriveDataTypeable
 12 | --
 13 | ----------------------------------------------------------------------------
 14 | module Statistics.Distribution.Beta
 15 |   ( BetaDistribution
 16 |     -- * Constructor
 17 |   , betaDistr
 18 |   , betaDistrE
 19 |   , improperBetaDistr
 20 |   , improperBetaDistrE
 21 |     -- * Accessors
 22 |   , bdAlpha
 23 |   , bdBeta
 24 |   ) where
 25 | 
 26 | import Control.Applicative
 27 | import Data.Aeson            (FromJSON(..), ToJSON, Value(..), (.:))
 28 | import Data.Binary           (Binary(..))
 29 | import Data.Data             (Data, Typeable)
 30 | import GHC.Generics          (Generic)
 31 | import Numeric.SpecFunctions (
 32 |   incompleteBeta, invIncompleteBeta, logBeta, digamma, log1p)
 33 | import Numeric.MathFunctions.Constants (m_NaN,m_neg_inf)
 34 | import qualified Statistics.Distribution as D
 35 | import Statistics.Internal
 36 | 
 37 | 
 38 | -- | The beta distribution
 39 | data BetaDistribution = BD
 40 |  { bdAlpha :: {-# UNPACK #-} !Double
 41 |    -- ^ Alpha shape parameter
 42 |  , bdBeta  :: {-# UNPACK #-} !Double
 43 |    -- ^ Beta shape parameter
 44 |  } deriving (Eq, Typeable, Data, Generic)
 45 | 
 46 | instance Show BetaDistribution where
 47 |   showsPrec n (BD a b) = defaultShow2 "improperBetaDistr" a b n
 48 | instance Read BetaDistribution where
 49 |   readPrec = defaultReadPrecM2 "improperBetaDistr" improperBetaDistrE
 50 | 
 51 | instance ToJSON BetaDistribution
 52 | instance FromJSON BetaDistribution where
 53 |   parseJSON (Object v) = do
 54 |     a <- v .: "bdAlpha"
 55 |     b <- v .: "bdBeta"
 56 |     maybe (fail $ errMsgI a b) return $ improperBetaDistrE a b
 57 |   parseJSON _ = empty
 58 | 
 59 | instance Binary BetaDistribution where
 60 |   put (BD a b) = put a >> put b
 61 |   get = do
 62 |     a <- get
 63 |     b <- get
 64 |     maybe (fail $ errMsgI a b) return $ improperBetaDistrE a b
 65 | 
 66 | 
 67 | -- | Create beta distribution. Both shape parameters must be positive.
 68 | betaDistr :: Double             -- ^ Shape parameter alpha
 69 |           -> Double             -- ^ Shape parameter beta
 70 |           -> BetaDistribution
 71 | betaDistr a b = maybe (error $ errMsg a b) id $ betaDistrE a b
 72 | 
 73 | -- | Create beta distribution. Both shape parameters must be positive.
 74 | betaDistrE :: Double             -- ^ Shape parameter alpha
 75 |           -> Double             -- ^ Shape parameter beta
 76 |           -> Maybe BetaDistribution
 77 | betaDistrE a b
 78 |   | a > 0 && b > 0 = Just (BD a b)
 79 |   | otherwise      = Nothing
 80 | 
 81 | errMsg :: Double -> Double -> String
 82 | errMsg a b = "Statistics.Distribution.Beta.betaDistr: "
 83 |           ++ "shape parameters must be positive. Got a = "
 84 |           ++ show a
 85 |           ++ " b = "
 86 |           ++ show b
 87 | 
 88 | 
 89 | -- | Create beta distribution. Both shape parameters must be
 90 | -- non-negative. So it allows to construct improper beta distribution
 91 | -- which could be used as improper prior.
 92 | improperBetaDistr :: Double             -- ^ Shape parameter alpha
 93 |                   -> Double             -- ^ Shape parameter beta
 94 |                   -> BetaDistribution
 95 | improperBetaDistr a b
 96 |   = maybe (error $ errMsgI a b) id $ improperBetaDistrE a b
 97 | 
 98 | -- | Create beta distribution. Both shape parameters must be
 99 | -- non-negative. So it allows to construct improper beta distribution
100 | -- which could be used as improper prior.
101 | improperBetaDistrE :: Double             -- ^ Shape parameter alpha
102 |                    -> Double             -- ^ Shape parameter beta
103 |                    -> Maybe BetaDistribution
104 | improperBetaDistrE a b
105 |   | a >= 0 && b >= 0 = Just (BD a b)
106 |   | otherwise        = Nothing
107 | 
108 | errMsgI :: Double -> Double -> String
109 | errMsgI a b
110 |   =  "Statistics.Distribution.Beta.betaDistr: "
111 |   ++ "shape parameters must be non-negative. Got a = " ++ show a
112 |   ++ " b = " ++ show b
113 | 
114 | 
115 | 
116 | instance D.Distribution BetaDistribution where
117 |   cumulative (BD a b) x
118 |     | x <= 0    = 0
119 |     | x >= 1    = 1
120 |     | otherwise = incompleteBeta a b x
121 |   complCumulative (BD a b) x
122 |     | x <= 0    = 1
123 |     | x >= 1    = 0
124 |     -- For small x we use direct computation to avoid precision loss
125 |     -- when computing (1-x)
126 |     | x <  0.5  = 1 - incompleteBeta a b x
127 |     -- Otherwise we use property of incomplete beta:
128 |     --  > I(x,a,b) = 1 - I(1-x,b,a)
129 |     | otherwise = incompleteBeta b a (1-x)
130 | 
131 | instance D.Mean BetaDistribution where
132 |   mean (BD a b) = a / (a + b)
133 | 
134 | instance D.MaybeMean BetaDistribution where
135 |   maybeMean = Just . D.mean
136 | 
137 | instance D.Variance BetaDistribution where
138 |   variance (BD a b) = a*b / (apb*apb*(apb+1))
139 |     where apb = a + b
140 | 
141 | instance D.MaybeVariance BetaDistribution where
142 |   maybeVariance = Just . D.variance
143 | 
144 | instance D.Entropy BetaDistribution where
145 |   entropy (BD a b) =
146 |     logBeta a b
147 |     - (a-1) * digamma a
148 |     - (b-1) * digamma b
149 |     + (a+b-2) * digamma (a+b)
150 | 
151 | instance D.MaybeEntropy BetaDistribution where
152 |   maybeEntropy = Just . D.entropy
153 | 
154 | instance D.ContDistr BetaDistribution where
155 |   density (BD a b) x
156 |     | a <= 0 || b <= 0 = m_NaN
157 |     | x <= 0 = 0
158 |     | x >= 1 = 0
159 |     | otherwise = exp $ (a-1)*log x + (b-1) * log1p (-x) - logBeta a b
160 |   logDensity (BD a b) x
161 |     | a <= 0 || b <= 0 = m_NaN
162 |     | x <= 0 = m_neg_inf
163 |     | x >= 1 = m_neg_inf
164 |     | otherwise = (a-1)*log x + (b-1)*log1p (-x) - logBeta a b
165 | 
166 |   quantile (BD a b) p
167 |     | p == 0         = 0
168 |     | p == 1         = 1
169 |     | p > 0 && p < 1 = invIncompleteBeta a b p
170 |     | otherwise      =
171 |         error $ "Statistics.Distribution.Gamma.quantile: p must be in [0,1] range. Got: "++show p
172 | 
173 | instance D.ContGen BetaDistribution where
174 |   genContVar = D.genContinuous
175 | 


--------------------------------------------------------------------------------
/Statistics/Distribution/Binomial.hs:
--------------------------------------------------------------------------------
  1 | {-# LANGUAGE OverloadedStrings #-}
  2 | {-# LANGUAGE PatternGuards     #-}
  3 | {-# LANGUAGE DeriveDataTypeable, DeriveGeneric #-}
  4 | -- |
  5 | -- Module    : Statistics.Distribution.Binomial
  6 | -- Copyright : (c) 2009 Bryan O'Sullivan
  7 | -- License   : BSD3
  8 | --
  9 | -- Maintainer  : bos@serpentine.com
 10 | -- Stability   : experimental
 11 | -- Portability : portable
 12 | --
 13 | -- The binomial distribution.  This is the discrete probability
 14 | -- distribution of the number of successes in a sequence of /n/
 15 | -- independent yes\/no experiments, each of which yields success with
 16 | -- probability /p/.
 17 | 
 18 | module Statistics.Distribution.Binomial
 19 |     (
 20 |       BinomialDistribution
 21 |     -- * Constructors
 22 |     , binomial
 23 |     , binomialE
 24 |     -- * Accessors
 25 |     , bdTrials
 26 |     , bdProbability
 27 |     ) where
 28 | 
 29 | import Control.Applicative
 30 | import Data.Aeson            (FromJSON(..), ToJSON, Value(..), (.:))
 31 | import Data.Binary           (Binary(..))
 32 | import Data.Data             (Data, Typeable)
 33 | import GHC.Generics          (Generic)
 34 | import Numeric.SpecFunctions           (choose,logChoose,incompleteBeta,log1p)
 35 | import Numeric.MathFunctions.Constants (m_epsilon,m_tiny)
 36 | 
 37 | import qualified Statistics.Distribution as D
 38 | import qualified Statistics.Distribution.Poisson.Internal as I
 39 | import Statistics.Internal
 40 | 
 41 | 
 42 | -- | The binomial distribution.
 43 | data BinomialDistribution = BD {
 44 |       bdTrials      :: {-# UNPACK #-} !Int
 45 |     -- ^ Number of trials.
 46 |     , bdProbability :: {-# UNPACK #-} !Double
 47 |     -- ^ Probability.
 48 |     } deriving (Eq, Typeable, Data, Generic)
 49 | 
 50 | instance Show BinomialDistribution where
 51 |   showsPrec i (BD n p) = defaultShow2 "binomial" n p i
 52 | instance Read BinomialDistribution where
 53 |   readPrec = defaultReadPrecM2 "binomial" binomialE
 54 | 
 55 | instance ToJSON BinomialDistribution
 56 | instance FromJSON BinomialDistribution where
 57 |   parseJSON (Object v) = do
 58 |     n <- v .: "bdTrials"
 59 |     p <- v .: "bdProbability"
 60 |     maybe (fail $ errMsg n p) return $ binomialE n p
 61 |   parseJSON _ = empty
 62 | 
 63 | instance Binary BinomialDistribution where
 64 |   put (BD x y) = put x >> put y
 65 |   get = do
 66 |     n <- get
 67 |     p <- get
 68 |     maybe (fail $ errMsg n p) return $ binomialE n p
 69 | 
 70 | 
 71 | 
 72 | instance D.Distribution BinomialDistribution where
 73 |     cumulative = cumulative
 74 |     complCumulative = complCumulative
 75 | 
 76 | instance D.DiscreteDistr BinomialDistribution where
 77 |     probability    = probability
 78 |     logProbability = logProbability
 79 | 
 80 | instance D.Mean BinomialDistribution where
 81 |     mean = mean
 82 | 
 83 | instance D.Variance BinomialDistribution where
 84 |     variance = variance
 85 | 
 86 | instance D.MaybeMean BinomialDistribution where
 87 |     maybeMean = Just . D.mean
 88 | 
 89 | instance D.MaybeVariance BinomialDistribution where
 90 |     maybeStdDev   = Just . D.stdDev
 91 |     maybeVariance = Just . D.variance
 92 | 
 93 | instance D.Entropy BinomialDistribution where
 94 |   entropy (BD n p)
 95 |     | n == 0 = 0
 96 |     | n <= 100 = directEntropy (BD n p)
 97 |     | otherwise = I.poissonEntropy (fromIntegral n * p)
 98 | 
 99 | instance D.MaybeEntropy BinomialDistribution where
100 |   maybeEntropy = Just . D.entropy
101 | 
102 | -- This could be slow for big n
103 | probability :: BinomialDistribution -> Int -> Double
104 | probability (BD n p) k
105 |   | k < 0 || k > n = 0
106 |   | n == 0         = 1
107 |     -- choose could overflow Double for n >= 1030 so we switch to
108 |     -- log-domain to calculate probability
109 |     --
110 |     -- We also want to avoid underflow when computing p^k &
111 |     -- (1-p)^(n-k).
112 |   | n < 1000
113 |   , pK  >= m_tiny
114 |   , pNK >= m_tiny = choose n k * pK * pNK
115 |   | otherwise     = exp $ logChoose n k + log p * k' + log1p (-p) * nk'
116 |   where
117 |     pK  = p^k
118 |     pNK = (1-p)^(n-k)
119 |     k'  = fromIntegral k
120 |     nk' = fromIntegral $ n - k
121 | 
122 | logProbability :: BinomialDistribution -> Int -> Double
123 | logProbability (BD n p) k
124 |   | k < 0 || k > n          = (-1)/0
125 |   | n == 0                  = 0
126 |   | otherwise               = logChoose n k + log p * k' + log1p (-p) * nk'
127 |   where
128 |     k'  = fromIntegral   k
129 |     nk' = fromIntegral $ n - k
130 | 
131 | cumulative :: BinomialDistribution -> Double -> Double
132 | cumulative (BD n p) x
133 |   | isNaN x      = error "Statistics.Distribution.Binomial.cumulative: NaN input"
134 |   | isInfinite x = if x > 0 then 1 else 0
135 |   | k <  0       = 0
136 |   | k >= n       = 1
137 |   | otherwise    = incompleteBeta (fromIntegral (n-k)) (fromIntegral (k+1)) (1 - p)
138 |   where
139 |     k = floor x
140 | 
141 | complCumulative :: BinomialDistribution -> Double -> Double
142 | complCumulative (BD n p) x
143 |   | isNaN x      = error "Statistics.Distribution.Binomial.complCumulative: NaN input"
144 |   | isInfinite x = if x > 0 then 0 else 1
145 |   | k <  0       = 1
146 |   | k >= n       = 0
147 |   | otherwise    = incompleteBeta (fromIntegral (k+1)) (fromIntegral (n-k)) p
148 |   where
149 |     k = floor x
150 | 
151 | mean :: BinomialDistribution -> Double
152 | mean (BD n p) = fromIntegral n * p
153 | 
154 | variance :: BinomialDistribution -> Double
155 | variance (BD n p) = fromIntegral n * p * (1 - p)
156 | 
157 | directEntropy :: BinomialDistribution -> Double
158 | directEntropy d@(BD n _) =
159 |   negate . sum $
160 |   takeWhile (< negate m_epsilon) $
161 |   dropWhile (not . (< negate m_epsilon)) $
162 |   [ let x = probability d k in x * log x | k <- [0..n]]
163 | 
164 | -- | Construct binomial distribution. Number of trials must be
165 | --   non-negative and probability must be in [0,1] range
166 | binomial :: Int                 -- ^ Number of trials.
167 |          -> Double              -- ^ Probability.
168 |          -> BinomialDistribution
169 | binomial n p = maybe (error $ errMsg n p) id $ binomialE n p
170 | 
171 | -- | Construct binomial distribution. Number of trials must be
172 | --   non-negative and probability must be in [0,1] range
173 | binomialE :: Int                 -- ^ Number of trials.
174 |           -> Double              -- ^ Probability.
175 |           -> Maybe BinomialDistribution
176 | binomialE n p
177 |   | n < 0            = Nothing
178 |   | p >= 0 && p <= 1 = Just (BD n p)
179 |   | otherwise        = Nothing
180 | 
181 | errMsg :: Int -> Double -> String
182 | errMsg n p
183 |   = "Statistics.Distribution.Binomial.binomial: n=" ++ show n
184 |   ++ " p=" ++ show p ++ "but n>=0 and p in [0,1]"
185 | 


--------------------------------------------------------------------------------
/Statistics/Distribution/CauchyLorentz.hs:
--------------------------------------------------------------------------------
  1 | {-# LANGUAGE OverloadedStrings #-}
  2 | {-# LANGUAGE DeriveDataTypeable, DeriveGeneric #-}
  3 | -- |
  4 | -- Module    : Statistics.Distribution.CauchyLorentz
  5 | -- Copyright : (c) 2011 Aleksey Khudyakov
  6 | -- License   : BSD3
  7 | --
  8 | -- Maintainer  : bos@serpentine.com
  9 | -- Stability   : experimental
 10 | -- Portability : portable
 11 | --
 12 | -- The Cauchy-Lorentz distribution. It's also known as Lorentz
 13 | -- distribution or Breit–Wigner distribution.
 14 | --
 15 | -- It doesn't have mean and variance.
 16 | module Statistics.Distribution.CauchyLorentz (
 17 |     CauchyDistribution
 18 |   , cauchyDistribMedian
 19 |   , cauchyDistribScale
 20 |     -- * Constructors
 21 |   , cauchyDistribution
 22 |   , cauchyDistributionE
 23 |   , standardCauchy
 24 |   ) where
 25 | 
 26 | import Control.Applicative
 27 | import Data.Aeson             (FromJSON(..), ToJSON, Value(..), (.:))
 28 | import Data.Binary            (Binary(..))
 29 | import Data.Maybe             (fromMaybe)
 30 | import Data.Data              (Data, Typeable)
 31 | import GHC.Generics           (Generic)
 32 | import qualified Statistics.Distribution as D
 33 | import Statistics.Internal
 34 | 
 35 | -- | Cauchy-Lorentz distribution.
 36 | data CauchyDistribution = CD {
 37 |     -- | Central value of Cauchy-Lorentz distribution which is its
 38 |     --   mode and median. Distribution doesn't have mean so function
 39 |     --   is named after median.
 40 |     cauchyDistribMedian :: {-# UNPACK #-} !Double
 41 |     -- | Scale parameter of Cauchy-Lorentz distribution. It's
 42 |     --   different from variance and specify half width at half
 43 |     --   maximum (HWHM).
 44 |   , cauchyDistribScale  :: {-# UNPACK #-} !Double
 45 |   }
 46 |   deriving (Eq, Typeable, Data, Generic)
 47 | 
 48 | instance Show CauchyDistribution where
 49 |   showsPrec i (CD m s) = defaultShow2 "cauchyDistribution" m s i
 50 | instance Read CauchyDistribution where
 51 |   readPrec = defaultReadPrecM2 "cauchyDistribution" cauchyDistributionE
 52 | 
 53 | instance ToJSON   CauchyDistribution
 54 | instance FromJSON CauchyDistribution where
 55 |   parseJSON (Object v) = do
 56 |     m <- v .: "cauchyDistribMedian"
 57 |     s <- v .: "cauchyDistribScale"
 58 |     maybe (fail $ errMsg m s) return $ cauchyDistributionE m s
 59 |   parseJSON _ = empty
 60 | 
 61 | instance Binary CauchyDistribution where
 62 |     put (CD m s) = put m >> put s
 63 |     get = do
 64 |       m <- get
 65 |       s <- get
 66 |       maybe (error $ errMsg m s) return $ cauchyDistributionE m s
 67 | 
 68 | 
 69 | -- | Cauchy distribution
 70 | cauchyDistribution :: Double    -- ^ Central point
 71 |                    -> Double    -- ^ Scale parameter (FWHM)
 72 |                    -> CauchyDistribution
 73 | cauchyDistribution m s
 74 |   = fromMaybe (error $ errMsg m s)
 75 |   $ cauchyDistributionE m s
 76 | 
 77 | 
 78 | -- | Cauchy distribution
 79 | cauchyDistributionE :: Double    -- ^ Central point
 80 |                     -> Double    -- ^ Scale parameter (FWHM)
 81 |                     -> Maybe CauchyDistribution
 82 | cauchyDistributionE m s
 83 |   | s > 0     = Just (CD m s)
 84 |   | otherwise = Nothing
 85 | 
 86 | errMsg :: Double -> Double -> String
 87 | errMsg _ s
 88 |   = "Statistics.Distribution.CauchyLorentz.cauchyDistribution: FWHM must be positive. Got "
 89 |   ++ show s
 90 | 
 91 | -- | Standard Cauchy distribution. It's centered at 0 and have 1 FWHM
 92 | standardCauchy :: CauchyDistribution
 93 | standardCauchy = CD 0 1
 94 | 
 95 | 
 96 | instance D.Distribution CauchyDistribution where
 97 |   cumulative (CD m s) x
 98 |     | y < -1    = atan (-1/y) / pi
 99 |     | otherwise = 0.5 + atan y / pi
100 |     where
101 |        y = (x - m) / s
102 |   complCumulative (CD m s) x
103 |     | y > 1     = atan (1/y) / pi
104 |     | otherwise = 0.5 - atan y / pi
105 |     where
106 |        y = (x - m) / s
107 | 
108 | instance D.ContDistr CauchyDistribution where
109 |   density (CD m s) x = (1 / pi) / (s * (1 + y*y))
110 |     where y = (x - m) / s
111 |   quantile (CD m s) p
112 |     | p == 0    = -1 / 0
113 |     | p == 1    =  1 / 0
114 |     | p == 0.5  = m
115 |     | p < 0     = err
116 |     | p < 0.5   = m - s / tan( pi * p )
117 |     | p < 1     = m + s / tan( pi * (1 - p) )
118 |     | otherwise = err
119 |     where
120 |       err = error
121 |           $ "Statistics.Distribution.CauchyLorentz.quantile: p must be in [0,1] range. Got: "++show p
122 |   complQuantile (CD m s) p
123 |     | p == 0    =  1 / 0
124 |     | p == 1    = -1 / 0
125 |     | p == 0.5  = m
126 |     | p < 0     = err
127 |     | p < 0.5   = m + s / tan( pi * p )
128 |     | p < 1     = m - s / tan( pi * (1 - p) )
129 |     | otherwise = err
130 |     where
131 |       err = error
132 |           $ "Statistics.Distribution.CauchyLorentz.quantile: p must be in [0,1] range. Got: "++show p
133 | 
134 | 
135 | instance D.ContGen CauchyDistribution where
136 |   genContVar = D.genContinuous
137 | 
138 | instance D.Entropy CauchyDistribution where
139 |   entropy (CD _ s) = log s + log (4*pi)
140 | 
141 | instance D.MaybeEntropy CauchyDistribution where
142 |   maybeEntropy = Just . D.entropy
143 | 


--------------------------------------------------------------------------------
/Statistics/Distribution/ChiSquared.hs:
--------------------------------------------------------------------------------
  1 | {-# LANGUAGE OverloadedStrings #-}
  2 | {-# LANGUAGE DeriveDataTypeable, DeriveGeneric #-}
  3 | -- |
  4 | -- Module    : Statistics.Distribution.ChiSquared
  5 | -- Copyright : (c) 2010 Alexey Khudyakov
  6 | -- License   : BSD3
  7 | --
  8 | -- Maintainer  : bos@serpentine.com
  9 | -- Stability   : experimental
 10 | -- Portability : portable
 11 | --
 12 | -- The chi-squared distribution. This is a continuous probability
 13 | -- distribution of sum of squares of k independent standard normal
 14 | -- distributions. It's commonly used in statistical tests
 15 | module Statistics.Distribution.ChiSquared (
 16 |           ChiSquared
 17 |         , chiSquaredNDF
 18 |         -- * Constructors
 19 |         , chiSquared
 20 |         , chiSquaredE
 21 |         ) where
 22 | 
 23 | import Control.Applicative
 24 | import Data.Aeson            (FromJSON(..), ToJSON, Value(..), (.:))
 25 | import Data.Binary           (Binary(..))
 26 | import Data.Data             (Data, Typeable)
 27 | import GHC.Generics          (Generic)
 28 | import Numeric.SpecFunctions ( incompleteGamma,invIncompleteGamma,logGamma,digamma)
 29 | import Numeric.MathFunctions.Constants (m_neg_inf)
 30 | import qualified System.Random.MWC.Distributions as MWC
 31 | 
 32 | import qualified Statistics.Distribution         as D
 33 | import Statistics.Internal
 34 | 
 35 | 
 36 | 
 37 | -- | Chi-squared distribution
 38 | newtype ChiSquared = ChiSquared
 39 |   { chiSquaredNDF :: Int
 40 |     -- ^ Get number of degrees of freedom
 41 |   }
 42 |   deriving (Eq, Typeable, Data, Generic)
 43 | 
 44 | instance Show ChiSquared where
 45 |   showsPrec i (ChiSquared n) = defaultShow1 "chiSquared" n i
 46 | instance Read ChiSquared where
 47 |   readPrec = defaultReadPrecM1 "chiSquared" chiSquaredE
 48 | 
 49 | instance ToJSON ChiSquared
 50 | instance FromJSON ChiSquared where
 51 |   parseJSON (Object v) = do
 52 |     n <- v .: "chiSquaredNDF"
 53 |     maybe (fail $ errMsg n) return $ chiSquaredE n
 54 |   parseJSON _ = empty
 55 | 
 56 | instance Binary ChiSquared where
 57 |   put (ChiSquared x) = put x
 58 |   get = do n <- get
 59 |            maybe (fail $ errMsg n) return $ chiSquaredE n
 60 | 
 61 | 
 62 | -- | Construct chi-squared distribution. Number of degrees of freedom
 63 | --   must be positive.
 64 | chiSquared :: Int -> ChiSquared
 65 | chiSquared n = maybe (error $ errMsg n) id $ chiSquaredE n
 66 | 
 67 | -- | Construct chi-squared distribution. Number of degrees of freedom
 68 | --   must be positive.
 69 | chiSquaredE :: Int -> Maybe ChiSquared
 70 | chiSquaredE n
 71 |   | n <= 0    = Nothing
 72 |   | otherwise = Just (ChiSquared n)
 73 | 
 74 | errMsg :: Int -> String
 75 | errMsg n = "Statistics.Distribution.ChiSquared.chiSquared: N.D.F. must be positive. Got " ++ show n
 76 | 
 77 | instance D.Distribution ChiSquared where
 78 |   cumulative = cumulative
 79 | 
 80 | instance D.ContDistr ChiSquared where
 81 |   density chi x
 82 |     | x <= 0    = 0
 83 |     | otherwise = exp $ log x * (ndf2 - 1) - x2 - logGamma ndf2 - log 2 * ndf2
 84 |     where
 85 |       ndf  = fromIntegral $ chiSquaredNDF chi
 86 |       ndf2 = ndf/2
 87 |       x2   = x/2
 88 | 
 89 |   logDensity chi x
 90 |     | x <= 0    = m_neg_inf
 91 |     | otherwise = log x * (ndf2 - 1) - x2 - logGamma ndf2 - log 2 * ndf2
 92 |     where
 93 |       ndf  = fromIntegral $ chiSquaredNDF chi
 94 |       ndf2 = ndf/2
 95 |       x2   = x/2
 96 | 
 97 |   quantile = quantile
 98 | 
 99 | instance D.Mean ChiSquared where
100 |     mean (ChiSquared ndf) = fromIntegral ndf
101 | 
102 | instance D.Variance ChiSquared where
103 |     variance (ChiSquared ndf) = fromIntegral (2*ndf)
104 | 
105 | instance D.MaybeMean ChiSquared where
106 |     maybeMean = Just . D.mean
107 | 
108 | instance D.MaybeVariance ChiSquared where
109 |     maybeStdDev   = Just . D.stdDev
110 |     maybeVariance = Just . D.variance
111 | 
112 | instance D.Entropy ChiSquared where
113 |   entropy (ChiSquared ndf) =
114 |     let kHalf = 0.5 * fromIntegral ndf in
115 |     kHalf
116 |     + log 2
117 |     + logGamma kHalf
118 |     + (1-kHalf) * digamma kHalf
119 | 
120 | instance D.MaybeEntropy ChiSquared where
121 |   maybeEntropy = Just . D.entropy
122 | 
123 | instance D.ContGen ChiSquared where
124 |     genContVar (ChiSquared n) = MWC.chiSquare n
125 | 
126 | 
127 | cumulative :: ChiSquared -> Double -> Double
128 | cumulative chi x
129 |   | x <= 0    = 0
130 |   | otherwise = incompleteGamma (ndf/2) (x/2)
131 |   where
132 |     ndf = fromIntegral $ chiSquaredNDF chi
133 | 
134 | quantile :: ChiSquared -> Double -> Double
135 | quantile (ChiSquared ndf) p
136 |   | p == 0         = 0
137 |   | p == 1         = 1/0
138 |   | p > 0 && p < 1 = 2 * invIncompleteGamma (fromIntegral ndf / 2) p
139 |   | otherwise      =
140 |     error $ "Statistics.Distribution.ChiSquared.quantile: p must be in [0,1] range. Got: "++show p
141 | 


--------------------------------------------------------------------------------
/Statistics/Distribution/DiscreteUniform.hs:
--------------------------------------------------------------------------------
  1 | {-# LANGUAGE DeriveDataTypeable, DeriveGeneric, OverloadedStrings #-}
  2 | -- |
  3 | -- Module    : Statistics.Distribution.DiscreteUniform
  4 | -- Copyright : (c) 2016 André Szabolcs Szelp
  5 | -- License   : BSD3
  6 | --
  7 | -- Maintainer  : a.sz.szelp@gmail.com
  8 | -- Stability   : experimental
  9 | -- Portability : portable
 10 | --
 11 | -- The discrete uniform distribution. There are two parametrizations of
 12 | -- this distribution. First is the probability distribution on an
 13 | -- inclusive interval {1, ..., n}. This is parametrized with n only,
 14 | -- where p_1, ..., p_n = 1/n. ('discreteUniform').
 15 | --
 16 | -- The second parametrization is the uniform distribution on {a, ..., b} with
 17 | -- probabilities p_a, ..., p_b = 1/(a-b+1). This is parametrized with
 18 | -- /a/ and /b/. ('discreteUniformAB')
 19 | 
 20 | module Statistics.Distribution.DiscreteUniform
 21 |     (
 22 |       DiscreteUniform
 23 |     -- * Constructors
 24 |     , discreteUniform
 25 |     , discreteUniformAB
 26 |     -- * Accessors
 27 |     , rangeFrom
 28 |     , rangeTo
 29 |     ) where
 30 | 
 31 | import Control.Applicative (empty)
 32 | import Data.Aeson   (FromJSON(..), ToJSON, Value(..), (.:))
 33 | import Data.Binary  (Binary(..))
 34 | import Data.Data    (Data, Typeable)
 35 | import System.Random.Stateful (uniformRM)
 36 | import GHC.Generics (Generic)
 37 | 
 38 | import qualified Statistics.Distribution as D
 39 | import Statistics.Internal
 40 | 
 41 | 
 42 | 
 43 | -- | The discrete uniform distribution.
 44 | data DiscreteUniform = U {
 45 |       rangeFrom  :: {-# UNPACK #-} !Int
 46 |     -- ^ /a/, the lower bound of the support {a, ..., b}
 47 |     , rangeTo    :: {-# UNPACK #-} !Int
 48 |     -- ^ /b/, the upper bound of the support {a, ..., b}
 49 |     } deriving (Eq, Typeable, Data, Generic)
 50 | 
 51 | instance Show DiscreteUniform where
 52 |   showsPrec i (U a b) = defaultShow2 "discreteUniformAB" a b i
 53 | instance Read DiscreteUniform where
 54 |   readPrec = defaultReadPrecM2 "discreteUniformAB" (\a b -> Just (discreteUniformAB a b))
 55 | 
 56 | instance ToJSON   DiscreteUniform
 57 | instance FromJSON DiscreteUniform where
 58 |   parseJSON (Object v) = do
 59 |     a <- v .: "uniformA"
 60 |     b <- v .: "uniformB"
 61 |     return $ discreteUniformAB a b
 62 |   parseJSON _ = empty
 63 | 
 64 | instance Binary DiscreteUniform where
 65 |   put (U a b) = put a >> put b
 66 |   get         = discreteUniformAB <$> get <*> get
 67 | 
 68 | instance D.Distribution DiscreteUniform where
 69 |   cumulative (U a b) x
 70 |     | x < fromIntegral a = 0
 71 |     | x > fromIntegral b = 1
 72 |     | otherwise = fromIntegral (floor x - a + 1) / fromIntegral (b - a + 1)
 73 | 
 74 | instance D.DiscreteDistr DiscreteUniform where
 75 |   probability (U a b) k
 76 |     | k >= a && k <= b = 1 / fromIntegral (b - a + 1)
 77 |     | otherwise        = 0
 78 | 
 79 | instance D.Mean DiscreteUniform where
 80 |   mean (U a b) = fromIntegral (a+b)/2
 81 | 
 82 | instance D.Variance DiscreteUniform where
 83 |   variance (U a b) = (fromIntegral (b - a + 1)^(2::Int) - 1) / 12
 84 | 
 85 | instance D.MaybeMean DiscreteUniform where
 86 |   maybeMean = Just . D.mean
 87 | 
 88 | instance D.MaybeVariance DiscreteUniform where
 89 |   maybeStdDev   = Just . D.stdDev
 90 |   maybeVariance = Just . D.variance
 91 | 
 92 | instance D.Entropy DiscreteUniform where
 93 |   entropy (U a b) = log $ fromIntegral $ b - a + 1
 94 | 
 95 | instance D.MaybeEntropy DiscreteUniform where
 96 |   maybeEntropy = Just . D.entropy
 97 | 
 98 | instance D.ContGen DiscreteUniform where
 99 |   genContVar d = fmap fromIntegral . D.genDiscreteVar d
100 | 
101 | instance D.DiscreteGen DiscreteUniform where
102 |   genDiscreteVar (U a b) = uniformRM (a,b)
103 | 
104 | -- | Construct discrete uniform distribution on support {1, ..., n}.
105 | --   Range /n/ must be >0.
106 | discreteUniform :: Int             -- ^ Range
107 |                 -> DiscreteUniform
108 | discreteUniform n
109 |   | n < 1     = error $ msg ++ "range must be > 0. Got " ++ show n
110 |   | otherwise = U 1 n
111 |   where msg = "Statistics.Distribution.DiscreteUniform.discreteUniform: "
112 | 
113 | -- | Construct discrete uniform distribution on support {a, ..., b}.
114 | discreteUniformAB :: Int             -- ^ Lower boundary (inclusive)
115 |                   -> Int             -- ^ Upper boundary (inclusive)
116 |                   -> DiscreteUniform
117 | discreteUniformAB a b
118 |   | b < a     = U b a
119 |   | otherwise = U a b
120 | 


--------------------------------------------------------------------------------
/Statistics/Distribution/Exponential.hs:
--------------------------------------------------------------------------------
  1 | {-# LANGUAGE MultiParamTypeClasses #-}
  2 | {-# LANGUAGE OverloadedStrings #-}
  3 | {-# LANGUAGE DeriveDataTypeable, DeriveGeneric #-}
  4 | -- |
  5 | -- Module    : Statistics.Distribution.Exponential
  6 | -- Copyright : (c) 2009 Bryan O'Sullivan
  7 | -- License   : BSD3
  8 | --
  9 | -- Maintainer  : bos@serpentine.com
 10 | -- Stability   : experimental
 11 | -- Portability : portable
 12 | --
 13 | -- The exponential distribution.  This is the continuous probability
 14 | -- distribution of the times between events in a Poisson process, in
 15 | -- which events occur continuously and independently at a constant
 16 | -- average rate.
 17 | 
 18 | module Statistics.Distribution.Exponential
 19 |     (
 20 |       ExponentialDistribution
 21 |     -- * Constructors
 22 |     , exponential
 23 |     , exponentialE
 24 |     -- * Accessors
 25 |     , edLambda
 26 |     ) where
 27 | 
 28 | import Control.Applicative
 29 | import Data.Aeson                      (FromJSON(..),ToJSON,Value(..),(.:))
 30 | import Data.Binary                     (Binary, put, get)
 31 | import Data.Data                       (Data, Typeable)
 32 | import GHC.Generics                    (Generic)
 33 | import Numeric.SpecFunctions           (log1p,expm1)
 34 | import Numeric.MathFunctions.Constants (m_neg_inf)
 35 | import qualified System.Random.MWC.Distributions as MWC
 36 | 
 37 | import qualified Statistics.Distribution         as D
 38 | import qualified Statistics.Sample               as S
 39 | import Statistics.Internal
 40 | 
 41 | 
 42 | 
 43 | newtype ExponentialDistribution = ED {
 44 |       edLambda :: Double
 45 |     } deriving (Eq, Typeable, Data, Generic)
 46 | 
 47 | instance Show ExponentialDistribution where
 48 |   showsPrec n (ED l) = defaultShow1 "exponential" l n
 49 | instance Read ExponentialDistribution where
 50 |   readPrec = defaultReadPrecM1 "exponential" exponentialE
 51 | 
 52 | instance ToJSON ExponentialDistribution
 53 | instance FromJSON ExponentialDistribution where
 54 |   parseJSON (Object v) = do
 55 |     l <- v .: "edLambda"
 56 |     maybe (fail $ errMsg l) return $ exponentialE l
 57 |   parseJSON _ = empty
 58 | 
 59 | instance Binary ExponentialDistribution where
 60 |   put = put . edLambda
 61 |   get = do
 62 |     l <- get
 63 |     maybe (fail $ errMsg l) return $ exponentialE l
 64 | 
 65 | instance D.Distribution ExponentialDistribution where
 66 |     cumulative      = cumulative
 67 |     complCumulative = complCumulative
 68 | 
 69 | instance D.ContDistr ExponentialDistribution where
 70 |     density (ED l) x
 71 |       | x < 0     = 0
 72 |       | otherwise = l * exp (-l * x)
 73 |     logDensity (ED l) x
 74 |       | x < 0     = m_neg_inf
 75 |       | otherwise = log l + (-l * x)
 76 |     quantile      = quantile
 77 |     complQuantile = complQuantile
 78 | 
 79 | instance D.Mean ExponentialDistribution where
 80 |     mean (ED l) = 1 / l
 81 | 
 82 | instance D.Variance ExponentialDistribution where
 83 |     variance (ED l) = 1 / (l * l)
 84 | 
 85 | instance D.MaybeMean ExponentialDistribution where
 86 |     maybeMean = Just . D.mean
 87 | 
 88 | instance D.MaybeVariance ExponentialDistribution where
 89 |     maybeStdDev   = Just . D.stdDev
 90 |     maybeVariance = Just . D.variance
 91 | 
 92 | instance D.Entropy ExponentialDistribution where
 93 |   entropy (ED l) = 1 - log l
 94 | 
 95 | instance D.MaybeEntropy ExponentialDistribution where
 96 |   maybeEntropy = Just . D.entropy
 97 | 
 98 | instance D.ContGen ExponentialDistribution where
 99 |   genContVar = MWC.exponential . edLambda
100 | 
101 | cumulative :: ExponentialDistribution -> Double -> Double
102 | cumulative (ED l) x | x <= 0    = 0
103 |                     | otherwise = - expm1 (-l * x)
104 | 
105 | complCumulative :: ExponentialDistribution -> Double -> Double
106 | complCumulative (ED l) x | x <= 0    = 1
107 |                          | otherwise = exp (-l * x)
108 | 
109 | 
110 | quantile :: ExponentialDistribution -> Double -> Double
111 | quantile (ED l) p
112 |   | p >= 0 && p <= 1 = - log1p(-p) / l
113 |   | otherwise        =
114 |     error $ "Statistics.Distribution.Exponential.quantile: p must be in [0,1] range. Got: "++show p
115 | 
116 | complQuantile :: ExponentialDistribution -> Double -> Double
117 | complQuantile (ED l) p
118 |   | p == 0          = 0
119 |   | p >= 0 && p < 1 = -log p / l
120 |   | otherwise       =
121 |     error $ "Statistics.Distribution.Exponential.quantile: p must be in [0,1] range. Got: "++show p
122 | 
123 | -- | Create an exponential distribution.
124 | exponential :: Double            -- ^ Rate parameter.
125 |             -> ExponentialDistribution
126 | exponential l = maybe (error $ errMsg l) id $ exponentialE l
127 | 
128 | -- | Create an exponential distribution.
129 | exponentialE :: Double            -- ^ Rate parameter.
130 |              -> Maybe ExponentialDistribution
131 | exponentialE l
132 |   | l > 0     = Just (ED l)
133 |   | otherwise = Nothing
134 | 
135 | errMsg :: Double -> String
136 | errMsg l = "Statistics.Distribution.Exponential.exponential: scale parameter must be positive. Got " ++ show l
137 | 
138 | -- | Create exponential distribution from sample.  Estimates the rate
139 | --   with the maximum likelihood estimator, which is biased. Returns
140 | --   @Nothing@ if the sample mean does not exist or is not positive.
141 | instance D.FromSample ExponentialDistribution Double where
142 |   fromSample xs = let m = S.mean xs
143 |                   in  if m > 0 then Just (ED (1/m)) else Nothing
144 | 


--------------------------------------------------------------------------------
/Statistics/Distribution/FDistribution.hs:
--------------------------------------------------------------------------------
  1 | {-# LANGUAGE OverloadedStrings #-}
  2 | {-# LANGUAGE DeriveDataTypeable, DeriveGeneric #-}
  3 | -- |
  4 | -- Module    : Statistics.Distribution.FDistribution
  5 | -- Copyright : (c) 2011 Aleksey Khudyakov
  6 | -- License   : BSD3
  7 | --
  8 | -- Maintainer  : bos@serpentine.com
  9 | -- Stability   : experimental
 10 | -- Portability : portable
 11 | --
 12 | -- Fisher F distribution
 13 | module Statistics.Distribution.FDistribution (
 14 |     FDistribution
 15 |     -- * Constructors
 16 |   , fDistribution
 17 |   , fDistributionE
 18 |   , fDistributionReal
 19 |   , fDistributionRealE
 20 |     -- * Accessors
 21 |   , fDistributionNDF1
 22 |   , fDistributionNDF2
 23 |   ) where
 24 | 
 25 | import Control.Applicative
 26 | import Data.Aeson             (FromJSON(..), ToJSON, Value(..), (.:))
 27 | import Data.Binary            (Binary(..))
 28 | import Data.Data              (Data, Typeable)
 29 | import GHC.Generics           (Generic)
 30 | import Numeric.SpecFunctions (
 31 |   logBeta, incompleteBeta, invIncompleteBeta, digamma)
 32 | import Numeric.MathFunctions.Constants (m_neg_inf)
 33 | 
 34 | import qualified Statistics.Distribution as D
 35 | import Statistics.Function (square)
 36 | import Statistics.Internal
 37 | 
 38 | 
 39 | -- | F distribution
 40 | data FDistribution = F { fDistributionNDF1 :: {-# UNPACK #-} !Double
 41 |                        , fDistributionNDF2 :: {-# UNPACK #-} !Double
 42 |                        , _pdfFactor        :: {-# UNPACK #-} !Double
 43 |                        }
 44 |                    deriving (Eq, Typeable, Data, Generic)
 45 | 
 46 | instance Show FDistribution where
 47 |   showsPrec i (F n m _) = defaultShow2 "fDistributionReal" n m i
 48 | instance Read FDistribution where
 49 |   readPrec = defaultReadPrecM2 "fDistributionReal" fDistributionRealE
 50 | 
 51 | instance ToJSON FDistribution
 52 | instance FromJSON FDistribution where
 53 |   parseJSON (Object v) = do
 54 |     n <- v .: "fDistributionNDF1"
 55 |     m <- v .: "fDistributionNDF2"
 56 |     maybe (fail $ errMsgR n m) return $ fDistributionRealE n m
 57 |   parseJSON _ = empty
 58 | 
 59 | instance Binary FDistribution where
 60 |   put (F n m _) = put n >> put m
 61 |   get = do
 62 |     n <- get
 63 |     m <- get
 64 |     maybe (fail $ errMsgR n m) return $ fDistributionRealE n m
 65 | 
 66 | fDistribution :: Int -> Int -> FDistribution
 67 | fDistribution n m = maybe (error $ errMsg n m) id $ fDistributionE n m
 68 | 
 69 | fDistributionReal :: Double -> Double -> FDistribution
 70 | fDistributionReal n m = maybe (error $ errMsgR n m) id $ fDistributionRealE n m
 71 | 
 72 | fDistributionE :: Int -> Int -> Maybe FDistribution
 73 | fDistributionE n m
 74 |   | n > 0 && m > 0 =
 75 |     let n' = fromIntegral n
 76 |         m' = fromIntegral m
 77 |         f' = 0.5 * (log m' * m' + log n' * n') - logBeta (0.5*n') (0.5*m')
 78 |     in Just $ F n' m' f'
 79 |   | otherwise = Nothing
 80 | 
 81 | fDistributionRealE :: Double -> Double -> Maybe FDistribution
 82 | fDistributionRealE n m
 83 |   | n > 0 && m > 0 =
 84 |     let f' = 0.5 * (log m * m + log n * n) - logBeta (0.5*n) (0.5*m)
 85 |     in Just $ F n m f'
 86 |   | otherwise = Nothing
 87 | 
 88 | errMsg :: Int -> Int -> String
 89 | errMsg _ _ = "Statistics.Distribution.FDistribution.fDistribution: non-positive number of degrees of freedom"
 90 | 
 91 | errMsgR :: Double -> Double -> String
 92 | errMsgR _ _ = "Statistics.Distribution.FDistribution.fDistribution: non-positive number of degrees of freedom"
 93 | 
 94 | 
 95 | 
 96 | instance D.Distribution FDistribution where
 97 |   cumulative      = cumulative
 98 |   complCumulative = complCumulative
 99 | 
100 | instance D.ContDistr FDistribution where
101 |   density d x
102 |     | x <= 0    = 0
103 |     | otherwise = exp $ logDensity d x
104 |   logDensity d x
105 |     | x <= 0    = m_neg_inf
106 |     | otherwise = logDensity d x
107 |   quantile = quantile
108 | 
109 | cumulative :: FDistribution -> Double -> Double
110 | cumulative (F n m _) x
111 |   | x <= 0       = 0
112 |   -- Only matches +∞
113 |   | isInfinite x = 1
114 |   -- NOTE: Here we rely on implementation detail of incompleteBeta. It
115 |   --       computes using series expansion for sufficiently small x
116 |   --       and uses following identity otherwise:
117 |   --
118 |   --           I(x; a, b) = 1 - I(1-x; b, a)
119 |   --
120 |   --       Point is we can compute 1-x as m/(m+y) without loss of
121 |   --       precision for large x. Sadly this switchover point is
122 |   --       implementation detail.
123 |   | n >= (n+m)*bx = incompleteBeta (0.5 * n) (0.5 * m) bx
124 |   | otherwise     = 1 - incompleteBeta (0.5 * m) (0.5 * n) bx1
125 |   where
126 |     y   = n * x
127 |     bx  = y / (m + y)
128 |     bx1 = m / (m + y)
129 | 
130 | complCumulative :: FDistribution -> Double -> Double
131 | complCumulative (F n m _) x
132 |   | x <= 0        = 1
133 |   -- Only matches +∞
134 |   | isInfinite x  = 0
135 |   -- See NOTE at cumulative
136 |   | m >= (n+m)*bx = incompleteBeta (0.5 * m) (0.5 * n) bx
137 |   | otherwise     = 1 - incompleteBeta (0.5 * n) (0.5 * m) bx1
138 |   where
139 |     y   = n*x
140 |     bx  = m / (m + y)
141 |     bx1 = y / (m + y)
142 | 
143 | logDensity :: FDistribution -> Double -> Double
144 | logDensity (F n m fac) x
145 |   = fac + log x * (0.5 * n - 1) - log(m + n*x) * 0.5 * (n + m)
146 | 
147 | quantile :: FDistribution -> Double -> Double
148 | quantile (F n m _) p
149 |   | p >= 0 && p <= 1 =
150 |     let x = invIncompleteBeta (0.5 * n) (0.5 * m) p
151 |     in m * x / (n * (1 - x))
152 |   | otherwise =
153 |     error $ "Statistics.Distribution.Uniform.quantile: p must be in [0,1] range. Got: "++show p
154 | 
155 | 
156 | instance D.MaybeMean FDistribution where
157 |   maybeMean (F _ m _) | m > 2     = Just $ m / (m - 2)
158 |                       | otherwise = Nothing
159 | 
160 | instance D.MaybeVariance FDistribution where
161 |   maybeStdDev (F n m _)
162 |     | m > 4     = Just $ 2 * square m * (m + n - 2) / (n * square (m - 2) * (m - 4))
163 |     | otherwise = Nothing
164 | 
165 | instance D.Entropy FDistribution where
166 |   entropy (F n m _) =
167 |     let nHalf = 0.5 * n
168 |         mHalf = 0.5 * m in
169 |     log (n/m)
170 |     + logBeta nHalf mHalf
171 |     + (1 - nHalf) * digamma nHalf
172 |     - (1 + mHalf) * digamma mHalf
173 |     + (nHalf + mHalf) * digamma (nHalf + mHalf)
174 | 
175 | instance D.MaybeEntropy FDistribution where
176 |   maybeEntropy = Just . D.entropy
177 | 
178 | instance D.ContGen FDistribution where
179 |   genContVar = D.genContinuous
180 | 


--------------------------------------------------------------------------------
/Statistics/Distribution/Hypergeometric.hs:
--------------------------------------------------------------------------------
  1 | {-# LANGUAGE OverloadedStrings #-}
  2 | {-# LANGUAGE DeriveDataTypeable, DeriveGeneric #-}
  3 | -- |
  4 | -- Module    : Statistics.Distribution.Hypergeometric
  5 | -- Copyright : (c) 2009 Bryan O'Sullivan
  6 | -- License   : BSD3
  7 | --
  8 | -- Maintainer  : bos@serpentine.com
  9 | -- Stability   : experimental
 10 | -- Portability : portable
 11 | --
 12 | -- The Hypergeometric distribution.  This is the discrete probability
 13 | -- distribution that measures the probability of /k/ successes in /l/
 14 | -- trials, without replacement, from a finite population.
 15 | --
 16 | -- The parameters of the distribution describe /k/ elements chosen
 17 | -- from a population of /l/, with /m/ elements of one type, and
 18 | -- /l/-/m/ of the other (all are positive integers).
 19 | 
 20 | module Statistics.Distribution.Hypergeometric
 21 |     (
 22 |       HypergeometricDistribution
 23 |     -- * Constructors
 24 |     , hypergeometric
 25 |     , hypergeometricE
 26 |     -- ** Accessors
 27 |     , hdM
 28 |     , hdL
 29 |     , hdK
 30 |     ) where
 31 | 
 32 | import Control.Applicative
 33 | import Data.Aeson           (FromJSON(..), ToJSON, Value(..), (.:))
 34 | import Data.Binary          (Binary(..))
 35 | import Data.Data            (Data, Typeable)
 36 | import GHC.Generics         (Generic)
 37 | import Numeric.MathFunctions.Constants (m_epsilon,m_neg_inf)
 38 | import Numeric.SpecFunctions (choose,logChoose)
 39 | 
 40 | import qualified Statistics.Distribution as D
 41 | import Statistics.Internal
 42 | 
 43 | 
 44 | data HypergeometricDistribution = HD {
 45 |       hdM :: {-# UNPACK #-} !Int
 46 |     , hdL :: {-# UNPACK #-} !Int
 47 |     , hdK :: {-# UNPACK #-} !Int
 48 |     } deriving (Eq, Typeable, Data, Generic)
 49 | 
 50 | instance Show HypergeometricDistribution where
 51 |   showsPrec i (HD m l k) = defaultShow3 "hypergeometric" m l k i
 52 | instance Read HypergeometricDistribution where
 53 |   readPrec = defaultReadPrecM3 "hypergeometric" hypergeometricE
 54 | 
 55 | instance ToJSON HypergeometricDistribution
 56 | instance FromJSON HypergeometricDistribution where
 57 |   parseJSON (Object v) = do
 58 |     m <- v .: "hdM"
 59 |     l <- v .: "hdL"
 60 |     k <- v .: "hdK"
 61 |     maybe (fail $ errMsg m l k) return $ hypergeometricE m l k
 62 |   parseJSON _ = empty
 63 | 
 64 | instance Binary HypergeometricDistribution where
 65 |   put (HD m l k) = put m >> put l >> put k
 66 |   get = do
 67 |     m <- get
 68 |     l <- get
 69 |     k <- get
 70 |     maybe (fail $ errMsg m l k) return $ hypergeometricE m l k
 71 | 
 72 | instance D.Distribution HypergeometricDistribution where
 73 |     cumulative = cumulative
 74 |     complCumulative = complCumulative
 75 | 
 76 | instance D.DiscreteDistr HypergeometricDistribution where
 77 |     probability    = probability
 78 |     logProbability = logProbability
 79 | 
 80 | instance D.Mean HypergeometricDistribution where
 81 |     mean = mean
 82 | 
 83 | instance D.Variance HypergeometricDistribution where
 84 |     variance = variance
 85 | 
 86 | instance D.MaybeMean HypergeometricDistribution where
 87 |     maybeMean = Just . D.mean
 88 | 
 89 | instance D.MaybeVariance HypergeometricDistribution where
 90 |     maybeStdDev   = Just . D.stdDev
 91 |     maybeVariance = Just . D.variance
 92 | 
 93 | instance D.Entropy HypergeometricDistribution where
 94 |   entropy = directEntropy
 95 | 
 96 | instance D.MaybeEntropy HypergeometricDistribution where
 97 |   maybeEntropy = Just . D.entropy
 98 | 
 99 | variance :: HypergeometricDistribution -> Double
100 | variance (HD m l k) = (k' * ml) * (1 - ml) * (l' - k') / (l' - 1)
101 |   where m' = fromIntegral m
102 |         l' = fromIntegral l
103 |         k' = fromIntegral k
104 |         ml = m' / l'
105 | 
106 | mean :: HypergeometricDistribution -> Double
107 | mean (HD m l k) = fromIntegral k * fromIntegral m / fromIntegral l
108 | 
109 | directEntropy :: HypergeometricDistribution -> Double
110 | directEntropy d@(HD m _ _)
111 |   = negate . sum
112 |   $ takeWhile (< negate m_epsilon)
113 |   $ dropWhile (not . (< negate m_epsilon))
114 |     [ let x = probability d n in x * log x | n <- [0..m]]
115 | 
116 | 
117 | hypergeometric :: Int               -- ^ /m/
118 |                -> Int               -- ^ /l/
119 |                -> Int               -- ^ /k/
120 |                -> HypergeometricDistribution
121 | hypergeometric m l k
122 |   = maybe (error $ errMsg m l k) id $ hypergeometricE m l k
123 | 
124 | hypergeometricE :: Int               -- ^ /m/
125 |                 -> Int               -- ^ /l/
126 |                 -> Int               -- ^ /k/
127 |                 -> Maybe HypergeometricDistribution
128 | hypergeometricE m l k
129 |   | not (l > 0)            = Nothing
130 |   | not (m >= 0 && m <= l) = Nothing
131 |   | not (k > 0  && k <= l) = Nothing
132 |   | otherwise              = Just (HD m l k)
133 | 
134 | 
135 | errMsg :: Int -> Int -> Int -> String
136 | errMsg m l k
137 |   =  "Statistics.Distribution.Hypergeometric.hypergeometric:"
138 |   ++ " m=" ++ show m
139 |   ++ " l=" ++ show l
140 |   ++ " k=" ++ show k
141 |   ++ " should hold: l>0 & m in [0,l] & k in (0,l]"
142 | 
143 | -- Naive implementation
144 | probability :: HypergeometricDistribution -> Int -> Double
145 | probability (HD mi li ki) n
146 |   | n < max 0 (mi+ki-li) || n > min mi ki = 0
147 |     -- No overflow
148 |   | li < 1000 = choose mi n * choose (li - mi) (ki - n)
149 |               / choose li ki
150 |   | otherwise = exp $ logChoose mi n
151 |                     + logChoose (li - mi) (ki - n)
152 |                     - logChoose li ki
153 | 
154 | logProbability :: HypergeometricDistribution -> Int -> Double
155 | logProbability (HD mi li ki) n
156 |   | n < max 0 (mi+ki-li) || n > min mi ki = m_neg_inf
157 |   | otherwise = logChoose mi n
158 |               + logChoose (li - mi) (ki - n)
159 |               - logChoose li ki
160 | 
161 | cumulative :: HypergeometricDistribution -> Double -> Double
162 | cumulative d@(HD mi li ki) x
163 |   | isNaN x      = error "Statistics.Distribution.Hypergeometric.cumulative: NaN argument"
164 |   | isInfinite x = if x > 0 then 1 else 0
165 |   | n <  minN    = 0
166 |   | n >= maxN    = 1
167 |   | otherwise    = D.sumProbabilities d minN n
168 |   where
169 |     n    = floor x
170 |     minN = max 0 (mi+ki-li)
171 |     maxN = min mi ki
172 | 
173 | complCumulative :: HypergeometricDistribution -> Double -> Double
174 | complCumulative d@(HD mi li ki) x
175 |   | isNaN x      = error "Statistics.Distribution.Hypergeometric.complCumulative: NaN argument"
176 |   | isInfinite x = if x > 0 then 0 else 1
177 |   | n <  minN    = 1
178 |   | n >= maxN    = 0
179 |   | otherwise    = D.sumProbabilities d (n + 1) maxN
180 |   where
181 |     n    = floor x
182 |     minN = max 0 (mi+ki-li)
183 |     maxN = min mi ki
184 | 


--------------------------------------------------------------------------------
/Statistics/Distribution/Laplace.hs:
--------------------------------------------------------------------------------
  1 | {-# LANGUAGE MultiParamTypeClasses #-}
  2 | {-# LANGUAGE OverloadedStrings #-}
  3 | {-# LANGUAGE DeriveDataTypeable, DeriveGeneric #-}
  4 | -- |
  5 | -- Module    : Statistics.Distribution.Laplace
  6 | -- Copyright : (c) 2015 Mihai Maruseac
  7 | -- License   : BSD3
  8 | --
  9 | -- Maintainer  : mihai.maruseac@maruseac.com
 10 | -- Stability   : experimental
 11 | -- Portability : portable
 12 | --
 13 | -- The Laplace distribution.  This is the continuous probability
 14 | -- defined as the difference of two iid exponential random variables
 15 | -- or a Brownian motion evaluated as exponentially distributed times.
 16 | -- It is used in differential privacy (Laplace Method), speech
 17 | -- recognition and least absolute deviations method (Laplace's first
 18 | -- law of errors, giving a robust regression method)
 19 | --
 20 | module Statistics.Distribution.Laplace
 21 |     (
 22 |       LaplaceDistribution
 23 |     -- * Constructors
 24 |     , laplace
 25 |     , laplaceE
 26 |     -- * Accessors
 27 |     , ldLocation
 28 |     , ldScale
 29 |     ) where
 30 | 
 31 | import Control.Applicative
 32 | import Data.Aeson           (FromJSON(..), ToJSON, Value(..), (.:))
 33 | import Data.Binary          (Binary(..))
 34 | import Data.Data            (Data, Typeable)
 35 | import GHC.Generics         (Generic)
 36 | import qualified Data.Vector.Generic             as G
 37 | import qualified Statistics.Distribution         as D
 38 | import qualified Statistics.Quantile             as Q
 39 | import qualified Statistics.Sample               as S
 40 | import Statistics.Internal
 41 | 
 42 | 
 43 | data LaplaceDistribution = LD {
 44 |       ldLocation :: {-# UNPACK #-} !Double
 45 |     -- ^ Location.
 46 |     , ldScale    :: {-# UNPACK #-} !Double
 47 |     -- ^ Scale.
 48 |     } deriving (Eq, Typeable, Data, Generic)
 49 | 
 50 | instance Show LaplaceDistribution where
 51 |   showsPrec i (LD l s) = defaultShow2 "laplace" l s i
 52 | instance Read LaplaceDistribution where
 53 |   readPrec = defaultReadPrecM2 "laplace" laplaceE
 54 | 
 55 | instance ToJSON LaplaceDistribution
 56 | instance FromJSON LaplaceDistribution where
 57 |   parseJSON (Object v) = do
 58 |     l <- v .: "ldLocation"
 59 |     s <- v .: "ldScale"
 60 |     maybe (fail $ errMsg l s) return $ laplaceE l s
 61 |   parseJSON _ = empty
 62 | 
 63 | instance Binary LaplaceDistribution where
 64 |   put (LD l s) = put l >> put s
 65 |   get = do
 66 |     l <- get
 67 |     s <- get
 68 |     maybe (fail $ errMsg l s) return $ laplaceE l s
 69 | 
 70 | instance D.Distribution LaplaceDistribution where
 71 |     cumulative      = cumulative
 72 |     complCumulative = complCumulative
 73 | 
 74 | instance D.ContDistr LaplaceDistribution where
 75 |     density    (LD l s) x = exp (- abs (x - l) / s) / (2 * s)
 76 |     logDensity (LD l s) x = - abs (x - l) / s - log 2 - log s
 77 |     quantile      = quantile
 78 |     complQuantile = complQuantile
 79 | 
 80 | instance D.Mean LaplaceDistribution where
 81 |     mean (LD l _) = l
 82 | 
 83 | instance D.Variance LaplaceDistribution where
 84 |     variance (LD _ s) = 2 * s * s
 85 | 
 86 | instance D.MaybeMean LaplaceDistribution where
 87 |     maybeMean = Just . D.mean
 88 | 
 89 | instance D.MaybeVariance LaplaceDistribution where
 90 |     maybeStdDev   = Just . D.stdDev
 91 |     maybeVariance = Just . D.variance
 92 | 
 93 | instance D.Entropy LaplaceDistribution where
 94 |   entropy (LD _ s) = 1 + log (2 * s)
 95 | 
 96 | instance D.MaybeEntropy LaplaceDistribution where
 97 |   maybeEntropy = Just . D.entropy
 98 | 
 99 | instance D.ContGen LaplaceDistribution where
100 |   genContVar = D.genContinuous
101 | 
102 | cumulative :: LaplaceDistribution -> Double -> Double
103 | cumulative (LD l s) x
104 |   | x <= l    = 0.5 * exp ( (x - l) / s)
105 |   | otherwise = 1 - 0.5 * exp ( - (x - l) / s )
106 | 
107 | complCumulative :: LaplaceDistribution -> Double -> Double
108 | complCumulative (LD l s) x
109 |   | x <= l    = 1 - 0.5 * exp ( (x - l) / s)
110 |   | otherwise = 0.5 * exp ( - (x - l) / s )
111 | 
112 | quantile :: LaplaceDistribution -> Double -> Double
113 | quantile (LD l s) p
114 |   | p == 0             = -inf
115 |   | p == 1             = inf
116 |   | p == 0.5           = l
117 |   | p > 0   && p < 0.5 = l + s * log (2 * p)
118 |   | p > 0.5 && p < 1   = l - s * log (2 - 2 * p)
119 |   | otherwise          =
120 |     error $ "Statistics.Distribution.Laplace.quantile: p must be in [0,1] range. Got: "++show p
121 |   where
122 |     inf = 1 / 0
123 | 
124 | complQuantile :: LaplaceDistribution -> Double -> Double
125 | complQuantile (LD l s) p
126 |   | p == 0             = inf
127 |   | p == 1             = -inf
128 |   | p == 0.5           = l
129 |   | p > 0   && p < 0.5 = l - s * log (2 * p)
130 |   | p > 0.5 && p < 1   = l + s * log (2 - 2 * p)
131 |   | otherwise          =
132 |     error $ "Statistics.Distribution.Laplace.quantile: p must be in [0,1] range. Got: "++show p
133 |   where
134 |     inf = 1 / 0
135 | 
136 | -- | Create an Laplace distribution.
137 | laplace :: Double         -- ^ Location
138 |         -> Double        -- ^ Scale
139 |         -> LaplaceDistribution
140 | laplace l s = maybe (error $ errMsg l s) id $ laplaceE l s
141 | 
142 | -- | Create an Laplace distribution.
143 | laplaceE :: Double         -- ^ Location
144 |          -> Double        -- ^ Scale
145 |          -> Maybe LaplaceDistribution
146 | laplaceE l s
147 |   | s >= 0    = Just (LD l s)
148 |   | otherwise = Nothing
149 | 
150 | errMsg :: Double -> Double -> String
151 | errMsg _ s = "Statistics.Distribution.Laplace.laplace: scale parameter must be positive. Got " ++ show s
152 | 
153 | 
154 | -- | Create Laplace distribution from sample.  The location is estimated
155 | --   as the median of the sample, and the scale as the mean absolute
156 | --   deviation of the median.
157 | instance D.FromSample LaplaceDistribution Double where
158 |   fromSample xs
159 |     | G.null xs = Nothing
160 |     | otherwise = Just $! LD s l
161 |     where
162 |       s = Q.median Q.medianUnbiased xs
163 |       l = S.mean $ G.map (\x -> abs $ x - s) xs
164 | 


--------------------------------------------------------------------------------
/Statistics/Distribution/Lognormal.hs:
--------------------------------------------------------------------------------
  1 | {-# LANGUAGE MultiParamTypeClasses #-}
  2 | {-# LANGUAGE DeriveDataTypeable, DeriveGeneric #-}
  3 | -- |
  4 | -- Module    : Statistics.Distribution.Lognormal
  5 | -- Copyright : (c) 2020 Ximin Luo
  6 | -- License   : BSD3
  7 | --
  8 | -- Maintainer  : infinity0@pwned.gg
  9 | -- Stability   : experimental
 10 | -- Portability : portable
 11 | --
 12 | -- The log normal distribution.  This is a continuous probability
 13 | -- distribution that describes data whose log is clustered around a
 14 | -- mean. For example, the multiplicative product of many independent
 15 | -- positive random variables.
 16 | 
 17 | module Statistics.Distribution.Lognormal
 18 |     (
 19 |       LognormalDistribution
 20 |       -- * Constructors
 21 |     , lognormalDistr
 22 |     , lognormalDistrErr
 23 |     , lognormalDistrMeanStddevErr
 24 |     , lognormalStandard
 25 |     ) where
 26 | 
 27 | import Data.Aeson            (FromJSON, ToJSON)
 28 | import Data.Binary           (Binary (..))
 29 | import Data.Data             (Data, Typeable)
 30 | import GHC.Generics          (Generic)
 31 | import Numeric.MathFunctions.Constants (m_huge, m_sqrt_2_pi)
 32 | import Numeric.SpecFunctions (expm1, log1p)
 33 | import qualified Data.Vector.Generic as G
 34 | 
 35 | import qualified Statistics.Distribution as D
 36 | import qualified Statistics.Distribution.Normal as N
 37 | import Statistics.Internal
 38 | 
 39 | 
 40 | -- | The lognormal distribution.
 41 | newtype LognormalDistribution = LND N.NormalDistribution
 42 |     deriving (Eq, Typeable, Data, Generic)
 43 | 
 44 | instance Show LognormalDistribution where
 45 |   showsPrec i (LND d) = defaultShow2 "lognormalDistr" m s i
 46 |    where
 47 |     m = D.mean d
 48 |     s = D.stdDev d
 49 | instance Read LognormalDistribution where
 50 |   readPrec = defaultReadPrecM2 "lognormalDistr" $
 51 |     (either (const Nothing) Just .) . lognormalDistrErr
 52 | 
 53 | instance ToJSON LognormalDistribution
 54 | instance FromJSON LognormalDistribution
 55 | 
 56 | instance Binary LognormalDistribution where
 57 |   put (LND d) = put m >> put s
 58 |    where
 59 |     m = D.mean d
 60 |     s = D.stdDev d
 61 |   get = do
 62 |     m  <- get
 63 |     sd <- get
 64 |     either fail return $ lognormalDistrErr m sd
 65 | 
 66 | instance D.Distribution LognormalDistribution where
 67 |   cumulative      = cumulative
 68 |   complCumulative = complCumulative
 69 | 
 70 | instance D.ContDistr LognormalDistribution where
 71 |   logDensity    = logDensity
 72 |   quantile      = quantile
 73 |   complQuantile = complQuantile
 74 | 
 75 | instance D.MaybeMean LognormalDistribution where
 76 |   maybeMean = Just . D.mean
 77 | 
 78 | instance D.Mean LognormalDistribution where
 79 |   mean (LND d) = exp (m + v / 2)
 80 |    where
 81 |     m = D.mean d
 82 |     v = D.variance d
 83 | 
 84 | instance D.MaybeVariance LognormalDistribution where
 85 |   maybeStdDev   = Just . D.stdDev
 86 |   maybeVariance = Just . D.variance
 87 | 
 88 | instance D.Variance LognormalDistribution where
 89 |   variance (LND d) = expm1 v * exp (2 * m + v)
 90 |    where
 91 |     m = D.mean d
 92 |     v = D.variance d
 93 | 
 94 | instance D.Entropy LognormalDistribution where
 95 |   entropy (LND d) = logBase 2 (s * exp (m + 0.5) * m_sqrt_2_pi)
 96 |    where
 97 |     m = D.mean d
 98 |     s = D.stdDev d
 99 | 
100 | instance D.MaybeEntropy LognormalDistribution where
101 |   maybeEntropy = Just . D.entropy
102 | 
103 | instance D.ContGen LognormalDistribution where
104 |   genContVar d = D.genContinuous d
105 | 
106 | -- | Standard log normal distribution with mu 0 and sigma 1.
107 | --
108 | -- Mean is @sqrt e@ and variance is @(e - 1) * e@.
109 | lognormalStandard :: LognormalDistribution
110 | lognormalStandard = LND N.standard
111 | 
112 | -- | Create log normal distribution from parameters.
113 | lognormalDistr
114 |   :: Double            -- ^ Mu
115 |   -> Double            -- ^ Sigma
116 |   -> LognormalDistribution
117 | lognormalDistr mu sig = either error id $ lognormalDistrErr mu sig
118 | 
119 | -- | Create log normal distribution from parameters.
120 | lognormalDistrErr
121 |   :: Double            -- ^ Mu
122 |   -> Double            -- ^ Sigma
123 |   -> Either String LognormalDistribution
124 | lognormalDistrErr mu sig
125 |   | sig >= sqrt (log m_huge - 2 * mu) = Left $ errMsg mu sig
126 |   | otherwise = LND <$> N.normalDistrErr mu sig
127 | 
128 | errMsg :: Double -> Double -> String
129 | errMsg mu sig =
130 |   "Statistics.Distribution.Lognormal.lognormalDistr: sigma must be > 0 && < "
131 |     ++ show lim ++ ". Got " ++ show sig
132 |   where lim = sqrt (log m_huge - 2 * mu)
133 | 
134 | -- | Create log normal distribution from mean and standard deviation.
135 | lognormalDistrMeanStddevErr
136 |   :: Double            -- ^ Mu
137 |   -> Double            -- ^ Sigma
138 |   -> Either String LognormalDistribution
139 | lognormalDistrMeanStddevErr m sd = LND <$> N.normalDistrErr mu sig
140 |   where r = sd / m
141 |         sig2 = log1p (r * r)
142 |         sig = sqrt sig2
143 |         mu = log m - sig2 / 2
144 | 
145 | -- | Variance is estimated using maximum likelihood method
146 | --   (biased estimation) over the log of the data.
147 | --
148 | --   Returns @Nothing@ if sample contains less than one element or
149 | --   variance is zero (all elements are equal)
150 | instance D.FromSample LognormalDistribution Double where
151 |   fromSample = fmap LND . D.fromSample . G.map log
152 | 
153 | logDensity :: LognormalDistribution -> Double -> Double
154 | logDensity (LND d) x
155 |   | x > 0 = let lx = log x in D.logDensity d lx - lx
156 |   | otherwise = 0
157 | 
158 | cumulative :: LognormalDistribution -> Double -> Double
159 | cumulative (LND d) x
160 |   | x > 0 = D.cumulative d $ log x
161 |   | otherwise = 0
162 | 
163 | complCumulative :: LognormalDistribution -> Double -> Double
164 | complCumulative (LND d) x
165 |   | x > 0 = D.complCumulative d $ log x
166 |   | otherwise = 1
167 | 
168 | quantile :: LognormalDistribution -> Double -> Double
169 | quantile (LND d) = exp . D.quantile d
170 | 
171 | complQuantile :: LognormalDistribution -> Double -> Double
172 | complQuantile (LND d) = exp . D.complQuantile d
173 | 


--------------------------------------------------------------------------------
/Statistics/Distribution/Poisson.hs:
--------------------------------------------------------------------------------
  1 | {-# LANGUAGE OverloadedStrings #-}
  2 | {-# LANGUAGE DeriveDataTypeable, DeriveGeneric #-}
  3 | -- |
  4 | -- Module    : Statistics.Distribution.Poisson
  5 | -- Copyright : (c) 2009, 2011 Bryan O'Sullivan
  6 | -- License   : BSD3
  7 | --
  8 | -- Maintainer  : bos@serpentine.com
  9 | -- Stability   : experimental
 10 | -- Portability : portable
 11 | --
 12 | -- The Poisson distribution.  This is the discrete probability
 13 | -- distribution of a number of events occurring in a fixed interval if
 14 | -- these events occur with a known average rate, and occur
 15 | -- independently from each other within that interval.
 16 | 
 17 | module Statistics.Distribution.Poisson
 18 |     (
 19 |       PoissonDistribution
 20 |     -- * Constructors
 21 |     , poisson
 22 |     , poissonE
 23 |     -- * Accessors
 24 |     , poissonLambda
 25 |     -- * References
 26 |     -- $references
 27 |     ) where
 28 | 
 29 | import Control.Applicative
 30 | import Data.Aeson           (FromJSON(..), ToJSON, Value(..), (.:))
 31 | import Data.Binary          (Binary(..))
 32 | import Data.Data            (Data, Typeable)
 33 | import GHC.Generics         (Generic)
 34 | import Numeric.SpecFunctions (incompleteGamma,logFactorial)
 35 | import Numeric.MathFunctions.Constants (m_neg_inf)
 36 | 
 37 | import qualified Statistics.Distribution as D
 38 | import qualified Statistics.Distribution.Poisson.Internal as I
 39 | import Statistics.Internal
 40 | 
 41 | 
 42 | 
 43 | newtype PoissonDistribution = PD {
 44 |       poissonLambda :: Double
 45 |     } deriving (Eq, Typeable, Data, Generic)
 46 | 
 47 | instance Show PoissonDistribution where
 48 |   showsPrec i (PD l) = defaultShow1 "poisson" l i
 49 | instance Read PoissonDistribution where
 50 |   readPrec = defaultReadPrecM1 "poisson" poissonE
 51 | 
 52 | instance ToJSON PoissonDistribution
 53 | instance FromJSON PoissonDistribution where
 54 |   parseJSON (Object v) = do
 55 |     l <- v .: "poissonLambda"
 56 |     maybe (fail $ errMsg l) return $ poissonE l
 57 |   parseJSON _ = empty
 58 | 
 59 | instance Binary PoissonDistribution where
 60 |   put = put . poissonLambda
 61 |   get = do
 62 |     l <- get
 63 |     maybe (fail $ errMsg l) return $ poissonE l
 64 | 
 65 | instance D.Distribution PoissonDistribution where
 66 |     cumulative (PD lambda) x
 67 |       | x < 0        = 0
 68 |       | isInfinite x = 1
 69 |       | isNaN      x = error "Statistics.Distribution.Poisson.cumulative: NaN input"
 70 |       | otherwise    = 1 - incompleteGamma (fromIntegral (floor x + 1 :: Int)) lambda
 71 | 
 72 | instance D.DiscreteDistr PoissonDistribution where
 73 |     probability (PD lambda) x = I.probability lambda (fromIntegral x)
 74 |     logProbability (PD lambda) i
 75 |       | i < 0     = m_neg_inf
 76 |       | otherwise = log lambda * fromIntegral i - logFactorial i - lambda
 77 | 
 78 | instance D.Variance PoissonDistribution where
 79 |     variance = poissonLambda
 80 | 
 81 | instance D.Mean PoissonDistribution where
 82 |     mean = poissonLambda
 83 | 
 84 | instance D.MaybeMean PoissonDistribution where
 85 |     maybeMean = Just . D.mean
 86 | 
 87 | instance D.MaybeVariance PoissonDistribution where
 88 |     maybeStdDev   = Just . D.stdDev
 89 | 
 90 | instance D.Entropy PoissonDistribution where
 91 |   entropy (PD lambda) = I.poissonEntropy lambda
 92 | 
 93 | instance D.MaybeEntropy PoissonDistribution where
 94 |   maybeEntropy = Just . D.entropy
 95 | 
 96 | -- | Create Poisson distribution.
 97 | poisson :: Double -> PoissonDistribution
 98 | poisson l = maybe (error $ errMsg l) id $ poissonE l
 99 | 
100 | -- | Create Poisson distribution.
101 | poissonE :: Double -> Maybe PoissonDistribution
102 | poissonE l
103 |   | l >=  0   = Just (PD l)
104 |   | otherwise = Nothing
105 | 
106 | errMsg :: Double -> String
107 | errMsg l = "Statistics.Distribution.Poisson.poisson: lambda must be non-negative. Got "
108 |         ++ show l
109 | 
110 | 
111 | -- $references
112 | --
113 | -- * Loader, C. (2000) Fast and Accurate Computation of Binomial
114 | --   Probabilities. <http://projects.scipy.org/scipy/raw-attachment/ticket/620/loader2000Fast.pdf>
115 | -- * Adell, J., Lekuona, A., and Yu, Y. (2010) Sharp Bounds on the
116 | --   Entropy of the Poisson Law and Related Quantities
117 | --   <http://arxiv.org/pdf/1001.2897.pdf>
118 | 


--------------------------------------------------------------------------------
/Statistics/Distribution/StudentT.hs:
--------------------------------------------------------------------------------
  1 | {-# LANGUAGE OverloadedStrings #-}
  2 | {-# LANGUAGE DeriveDataTypeable, DeriveGeneric #-}
  3 | -- |
  4 | -- Module    : Statistics.Distribution.StudentT
  5 | -- Copyright : (c) 2011 Aleksey Khudyakov
  6 | -- License   : BSD3
  7 | --
  8 | -- Maintainer  : bos@serpentine.com
  9 | -- Stability   : experimental
 10 | -- Portability : portable
 11 | --
 12 | -- Student-T distribution
 13 | module Statistics.Distribution.StudentT (
 14 |     StudentT
 15 |     -- * Constructors
 16 |   , studentT
 17 |   , studentTE
 18 |   , studentTUnstandardized
 19 |     -- * Accessors
 20 |   , studentTndf
 21 |   ) where
 22 | 
 23 | import Control.Applicative
 24 | import Data.Aeson          (FromJSON(..), ToJSON, Value(..), (.:))
 25 | import Data.Binary         (Binary(..))
 26 | import Data.Data           (Data, Typeable)
 27 | import GHC.Generics        (Generic)
 28 | import Numeric.SpecFunctions (
 29 |   logBeta, incompleteBeta, invIncompleteBeta, digamma, log1p)
 30 | 
 31 | import qualified Statistics.Distribution as D
 32 | import Statistics.Distribution.Transform (LinearTransform (..))
 33 | import Statistics.Internal
 34 | 
 35 | 
 36 | -- | Student-T distribution
 37 | newtype StudentT = StudentT { studentTndf :: Double }
 38 |                    deriving (Eq, Typeable, Data, Generic)
 39 | 
 40 | instance Show StudentT where
 41 |   showsPrec i (StudentT ndf) = defaultShow1 "studentT" ndf i
 42 | instance Read StudentT where
 43 |   readPrec = defaultReadPrecM1 "studentT" studentTE
 44 | 
 45 | instance ToJSON StudentT
 46 | instance FromJSON StudentT where
 47 |   parseJSON (Object v) = do
 48 |     ndf <- v .: "studentTndf"
 49 |     maybe (fail $ errMsg ndf) return $ studentTE ndf
 50 |   parseJSON _ = empty
 51 | 
 52 | instance Binary StudentT where
 53 |   put = put . studentTndf
 54 |   get = do
 55 |     ndf <- get
 56 |     maybe (fail $ errMsg ndf) return $ studentTE ndf
 57 | 
 58 | -- | Create Student-T distribution. Number of parameters must be positive.
 59 | studentT :: Double -> StudentT
 60 | studentT ndf = maybe (error $ errMsg ndf) id $ studentTE ndf
 61 | 
 62 | -- | Create Student-T distribution. Number of parameters must be positive.
 63 | studentTE :: Double -> Maybe StudentT
 64 | studentTE ndf
 65 |   | ndf > 0   = Just (StudentT ndf)
 66 |   | otherwise = Nothing
 67 | 
 68 | errMsg :: Double -> String
 69 | errMsg _ = modErr "studentT" "non-positive number of degrees of freedom"
 70 | 
 71 | 
 72 | instance D.Distribution StudentT where
 73 |   cumulative      = cumulative
 74 |   complCumulative = complCumulative
 75 | 
 76 | instance D.ContDistr StudentT where
 77 |   density    d@(StudentT ndf) x = exp (logDensityUnscaled d x) / sqrt ndf
 78 |   logDensity d@(StudentT ndf) x = logDensityUnscaled d x - log (sqrt ndf)
 79 |   quantile = quantile
 80 | 
 81 | cumulative :: StudentT -> Double -> Double
 82 | cumulative (StudentT ndf) x
 83 |   | x > 0     = 1 - 0.5 * ibeta
 84 |   | otherwise = 0.5 * ibeta
 85 |   where
 86 |     ibeta = incompleteBeta (0.5 * ndf) 0.5 (ndf / (ndf + x*x))
 87 | 
 88 | complCumulative :: StudentT -> Double -> Double
 89 | complCumulative (StudentT ndf) x
 90 |   | x > 0     = 0.5 * ibeta
 91 |   | otherwise = 1 - 0.5 * ibeta
 92 |   where
 93 |     ibeta = incompleteBeta (0.5 * ndf) 0.5 (ndf / (ndf + x*x))
 94 | 
 95 | 
 96 | logDensityUnscaled :: StudentT -> Double -> Double
 97 | logDensityUnscaled (StudentT ndf) x
 98 |   = log1p (x*x/ndf) * (-(0.5 * (1 + ndf)))
 99 |   - logBeta 0.5 (0.5 * ndf)
100 | 
101 | quantile :: StudentT -> Double -> Double
102 | quantile (StudentT ndf) p
103 |   | p >= 0 && p <= 1 =
104 |     let x = invIncompleteBeta (0.5 * ndf) 0.5 (2 * min p (1 - p))
105 |     in case sqrt $ ndf * (1 - x) / x of
106 |          r | p < 0.5   -> -r
107 |            | otherwise -> r
108 |   | otherwise = modErr "quantile" $ "p must be in [0,1] range. Got: "++show p
109 | 
110 | 
111 | instance D.MaybeMean StudentT where
112 |   maybeMean (StudentT ndf) | ndf > 1   = Just 0
113 |                            | otherwise = Nothing
114 | 
115 | instance D.MaybeVariance StudentT where
116 |   maybeVariance (StudentT ndf) | ndf > 2   = Just $! ndf / (ndf - 2)
117 |                                | otherwise = Nothing
118 | 
119 | instance D.Entropy StudentT where
120 |   entropy (StudentT ndf) =
121 |     0.5 * (ndf+1) * (digamma ((1+ndf)/2) - digamma(ndf/2))
122 |     + log (sqrt ndf)
123 |     + logBeta (ndf/2) 0.5
124 | 
125 | instance D.MaybeEntropy StudentT where
126 |   maybeEntropy = Just . D.entropy
127 | 
128 | instance D.ContGen StudentT where
129 |   genContVar = D.genContinuous
130 | 
131 | -- | Create an unstandardized Student-t distribution.
132 | studentTUnstandardized :: Double -- ^ Number of degrees of freedom
133 |                        -> Double -- ^ Central value (0 for standard Student T distribution)
134 |                        -> Double -- ^ Scale parameter
135 |                        -> LinearTransform StudentT
136 | studentTUnstandardized ndf mu sigma
137 |   | sigma > 0 = LinearTransform mu sigma $ studentT ndf
138 |   | otherwise = modErr "studentTUnstandardized" $ "sigma must be > 0. Got: " ++ show sigma
139 | 
140 | modErr :: String -> String -> a
141 | modErr fun msg = error $ "Statistics.Distribution.StudentT." ++ fun ++ ": " ++ msg
142 | 


--------------------------------------------------------------------------------
/Statistics/Distribution/Transform.hs:
--------------------------------------------------------------------------------
 1 | {-# LANGUAGE DeriveDataTypeable, DeriveGeneric, FlexibleContexts,
 2 |     FlexibleInstances, UndecidableInstances #-}
 3 | -- |
 4 | -- Module    : Statistics.Distribution.Transform
 5 | -- Copyright : (c) 2013 John McDonnell;
 6 | -- License   : BSD3
 7 | --
 8 | -- Maintainer  : bos@serpentine.com
 9 | -- Stability   : experimental
10 | -- Portability : portable
11 | --
12 | -- Transformations over distributions
13 | module Statistics.Distribution.Transform
14 |     (
15 |       LinearTransform (..)
16 |     , linTransFixedPoint
17 |     , scaleAround
18 |     ) where
19 | 
20 | import Data.Aeson (FromJSON, ToJSON)
21 | import Data.Binary (Binary)
22 | import Data.Binary (put, get)
23 | import Data.Data (Data, Typeable)
24 | import GHC.Generics (Generic)
25 | import qualified Statistics.Distribution as D
26 | 
27 | -- | Linear transformation applied to distribution.
28 | --
29 | -- > LinearTransform μ σ _
30 | -- > x' = μ + σ·x
31 | data LinearTransform d = LinearTransform
32 |   { linTransLocation :: {-# UNPACK #-} !Double
33 |     -- ^ Location parameter.
34 |   , linTransScale    :: {-# UNPACK #-} !Double
35 |     -- ^ Scale parameter.
36 |   , linTransDistr    :: d
37 |     -- ^ Distribution being transformed.
38 |   } deriving (Eq, Show, Read, Typeable, Data, Generic)
39 | 
40 | instance (FromJSON d) => FromJSON (LinearTransform d)
41 | instance (ToJSON d) => ToJSON (LinearTransform d)
42 | 
43 | instance (Binary d) => Binary (LinearTransform d) where
44 |     get = LinearTransform <$> get <*> get <*> get
45 |     put (LinearTransform x y z) = put x >> put y >> put z
46 | 
47 | -- | Apply linear transformation to distribution.
48 | scaleAround :: Double           -- ^ Fixed point
49 |             -> Double           -- ^ Scale parameter
50 |             -> d                -- ^ Distribution
51 |             -> LinearTransform d
52 | scaleAround x0 sc = LinearTransform (x0 * (1 - sc)) sc
53 | 
54 | -- | Get fixed point of linear transformation
55 | linTransFixedPoint :: LinearTransform d -> Double
56 | linTransFixedPoint (LinearTransform loc sc _) = loc / (1 - sc)
57 | 
58 | instance Functor LinearTransform where
59 |   fmap f (LinearTransform loc sc dist) = LinearTransform loc sc (f dist)
60 | 
61 | instance D.Distribution d => D.Distribution (LinearTransform d) where
62 |   cumulative (LinearTransform loc sc dist) x = D.cumulative dist $ (x-loc) / sc
63 | 
64 | instance D.ContDistr d => D.ContDistr (LinearTransform d) where
65 |   density    (LinearTransform loc sc dist) x = D.density    dist ((x-loc) / sc) / sc
66 |   logDensity (LinearTransform loc sc dist) x = D.logDensity dist ((x-loc) / sc) - log sc
67 |   quantile      (LinearTransform loc sc dist) p = loc + sc * D.quantile      dist p
68 |   complQuantile (LinearTransform loc sc dist) p = loc + sc * D.complQuantile dist p
69 | 
70 | instance D.MaybeMean d => D.MaybeMean (LinearTransform d) where
71 |   maybeMean (LinearTransform loc _ dist) = (+loc) <$> D.maybeMean dist
72 | 
73 | instance (D.Mean d) => D.Mean (LinearTransform d) where
74 |   mean (LinearTransform loc _ dist) = loc + D.mean dist
75 | 
76 | instance D.MaybeVariance  d => D.MaybeVariance (LinearTransform d) where
77 |   maybeVariance (LinearTransform _ sc dist) = (*(sc*sc)) <$> D.maybeVariance dist
78 |   maybeStdDev   (LinearTransform _ sc dist) = (*sc)      <$> D.maybeStdDev dist
79 | 
80 | instance (D.Variance d) => D.Variance (LinearTransform d) where
81 |   variance (LinearTransform _ sc dist) = sc * sc * D.variance dist
82 |   stdDev   (LinearTransform _ sc dist) = sc * D.stdDev dist
83 | 
84 | instance (D.MaybeEntropy d) => D.MaybeEntropy (LinearTransform d) where
85 |   maybeEntropy (LinearTransform _ _ dist) = D.maybeEntropy dist
86 | 
87 | instance (D.Entropy d) => D.Entropy (LinearTransform d) where
88 |   entropy (LinearTransform _ _ dist) = D.entropy dist
89 | 
90 | instance D.ContGen d => D.ContGen (LinearTransform d) where
91 |   genContVar (LinearTransform loc sc d) g = do
92 |     x <- D.genContVar d g
93 |     return $! loc + sc * x
94 | 


--------------------------------------------------------------------------------
/Statistics/Distribution/Uniform.hs:
--------------------------------------------------------------------------------
  1 | {-# LANGUAGE OverloadedStrings #-}
  2 | {-# LANGUAGE DeriveDataTypeable, DeriveGeneric #-}
  3 | -- |
  4 | -- Module    : Statistics.Distribution.Uniform
  5 | -- Copyright : (c) 2011 Aleksey Khudyakov
  6 | -- License   : BSD3
  7 | --
  8 | -- Maintainer  : bos@serpentine.com
  9 | -- Stability   : experimental
 10 | -- Portability : portable
 11 | --
 12 | -- Variate distributed uniformly in the interval.
 13 | module Statistics.Distribution.Uniform
 14 |     (
 15 |       UniformDistribution
 16 |     -- * Constructors
 17 |     , uniformDistr
 18 |     , uniformDistrE
 19 |     -- ** Accessors
 20 |     , uniformA
 21 |     , uniformB
 22 |     ) where
 23 | 
 24 | import Control.Applicative
 25 | import Data.Aeson             (FromJSON(..), ToJSON, Value(..), (.:))
 26 | import Data.Binary            (Binary(..))
 27 | import Data.Data              (Data, Typeable)
 28 | import System.Random.Stateful (uniformRM)
 29 | import GHC.Generics           (Generic)
 30 | 
 31 | import qualified Statistics.Distribution as D
 32 | import Statistics.Internal
 33 | 
 34 | 
 35 | 
 36 | -- | Uniform distribution from A to B
 37 | data UniformDistribution = UniformDistribution {
 38 |       uniformA :: {-# UNPACK #-} !Double -- ^ Low boundary of distribution
 39 |     , uniformB :: {-# UNPACK #-} !Double -- ^ Upper boundary of distribution
 40 |     } deriving (Eq, Typeable, Data, Generic)
 41 | 
 42 | instance Show UniformDistribution where
 43 |   showsPrec i (UniformDistribution a b) = defaultShow2 "uniformDistr" a b i
 44 | instance Read UniformDistribution where
 45 |   readPrec = defaultReadPrecM2 "uniformDistr" uniformDistrE
 46 | 
 47 | instance ToJSON UniformDistribution
 48 | instance FromJSON UniformDistribution where
 49 |   parseJSON (Object v) = do
 50 |     a <- v .: "uniformA"
 51 |     b <- v .: "uniformB"
 52 |     maybe (fail errMsg) return $ uniformDistrE a b
 53 |   parseJSON _ = empty
 54 | 
 55 | instance Binary UniformDistribution where
 56 |   put (UniformDistribution x y) = put x >> put y
 57 |   get = do
 58 |     a <- get
 59 |     b <- get
 60 |     maybe (fail errMsg) return $ uniformDistrE a b
 61 | 
 62 | -- | Create uniform distribution.
 63 | uniformDistr :: Double -> Double -> UniformDistribution
 64 | uniformDistr a b = maybe (error errMsg) id $ uniformDistrE a b
 65 | 
 66 | -- | Create uniform distribution.
 67 | uniformDistrE :: Double -> Double -> Maybe UniformDistribution
 68 | uniformDistrE a b
 69 |   | b < a     = Just $ UniformDistribution b a
 70 |   | a < b     = Just $ UniformDistribution a b
 71 |   | otherwise = Nothing
 72 | -- NOTE: failure is in default branch to guard against NaNs.
 73 | 
 74 | errMsg :: String
 75 | errMsg = "Statistics.Distribution.Uniform.uniform: wrong parameters"
 76 | 
 77 | 
 78 | instance D.Distribution UniformDistribution where
 79 |   cumulative (UniformDistribution a b) x
 80 |     | x < a     = 0
 81 |     | x > b     = 1
 82 |     | otherwise = (x - a) / (b - a)
 83 | 
 84 | instance D.ContDistr UniformDistribution where
 85 |   density (UniformDistribution a b) x
 86 |     | x < a     = 0
 87 |     | x > b     = 0
 88 |     | otherwise = 1 / (b - a)
 89 |   quantile (UniformDistribution a b) p
 90 |     | p >= 0 && p <= 1 = a + (b - a) * p
 91 |     | otherwise        =
 92 |       error $ "Statistics.Distribution.Uniform.quantile: p must be in [0,1] range. Got: "++show p
 93 |   complQuantile (UniformDistribution a b) p
 94 |     | p >= 0 && p <= 1 = b + (a - b) * p
 95 |     | otherwise        =
 96 |       error $ "Statistics.Distribution.Uniform.complQuantile: p must be in [0,1] range. Got: "++show p
 97 | 
 98 | instance D.Mean UniformDistribution where
 99 |   mean (UniformDistribution a b) = 0.5 * (a + b)
100 | 
101 | instance D.Variance UniformDistribution where
102 |   -- NOTE: 1/sqrt 12 is not constant folded (#4101) so it's written as
103 |   --       numerical constant. (Also FIXME!)
104 |   stdDev   (UniformDistribution a b) = 0.2886751345948129 * (b - a)
105 |   variance (UniformDistribution a b) = d * d / 12 where d = b - a
106 | 
107 | instance D.MaybeMean UniformDistribution where
108 |     maybeMean = Just . D.mean
109 | 
110 | instance D.MaybeVariance UniformDistribution where
111 |     maybeStdDev   = Just . D.stdDev
112 | 
113 | instance D.Entropy UniformDistribution where
114 |   entropy (UniformDistribution a b) = log (b - a)
115 | 
116 | instance D.MaybeEntropy UniformDistribution where
117 |   maybeEntropy = Just . D.entropy
118 | 
119 | instance D.ContGen UniformDistribution where
120 |     genContVar (UniformDistribution a b) = uniformRM (a,b)
121 | 


--------------------------------------------------------------------------------
/Statistics/Function.hs:
--------------------------------------------------------------------------------
  1 | {-# LANGUAGE BangPatterns, CPP, FlexibleContexts, Rank2Types #-}
  2 | {-# OPTIONS_GHC -fsimpl-tick-factor=200 #-}
  3 | -- |
  4 | -- Module    : Statistics.Function
  5 | -- Copyright : (c) 2009, 2010, 2011 Bryan O'Sullivan
  6 | -- License   : BSD3
  7 | --
  8 | -- Maintainer  : bos@serpentine.com
  9 | -- Stability   : experimental
 10 | -- Portability : portable
 11 | --
 12 | -- Useful functions.
 13 | 
 14 | module Statistics.Function
 15 |     (
 16 |     -- * Scanning
 17 |       minMax
 18 |     -- * Sorting
 19 |     , sort
 20 |     , gsort
 21 |     , sortBy
 22 |     , partialSort
 23 |     -- * Indexing
 24 |     , indexed
 25 |     , indices
 26 |     -- * Bit twiddling
 27 |     , nextHighestPowerOfTwo
 28 |     -- * Comparison
 29 |     , within
 30 |     -- * Arithmetic
 31 |     , square
 32 |     -- * Vectors
 33 |     , unsafeModify
 34 |     -- * Combinators
 35 |     , for
 36 |     , rfor
 37 |     ) where
 38 | 
 39 | #include "MachDeps.h"
 40 | 
 41 | import Control.Monad.ST (ST)
 42 | import Data.Bits ((.|.), shiftR)
 43 | import qualified Data.Vector.Algorithms.Intro as I
 44 | import qualified Data.Vector.Generic as G
 45 | import qualified Data.Vector.Unboxed as U
 46 | import qualified Data.Vector.Unboxed.Mutable as M
 47 | import Numeric.MathFunctions.Comparison (within)
 48 | 
 49 | -- | Sort a vector.
 50 | sort :: U.Vector Double -> U.Vector Double
 51 | sort = G.modify I.sort
 52 | {-# NOINLINE sort #-}
 53 | 
 54 | -- | Sort a vector.
 55 | gsort :: (Ord e, G.Vector v e) => v e -> v e
 56 | gsort = G.modify I.sort
 57 | {-# INLINE gsort #-}
 58 | 
 59 | -- | Sort a vector using a custom ordering.
 60 | sortBy :: (G.Vector v e) => I.Comparison e -> v e -> v e
 61 | sortBy f = G.modify $ I.sortBy f
 62 | {-# INLINE sortBy #-}
 63 | 
 64 | -- | Partially sort a vector, such that the least /k/ elements will be
 65 | -- at the front.
 66 | partialSort :: (G.Vector v e, Ord e) =>
 67 |                Int -- ^ The number /k/ of least elements.
 68 |             -> v e
 69 |             -> v e
 70 | partialSort k = G.modify (`I.partialSort` k)
 71 | {-# SPECIALIZE partialSort :: Int -> U.Vector Double -> U.Vector Double #-}
 72 | 
 73 | -- | Return the indices of a vector.
 74 | indices :: (G.Vector v a, G.Vector v Int) => v a -> v Int
 75 | indices a = G.enumFromTo 0 (G.length a - 1)
 76 | {-# INLINE indices #-}
 77 | 
 78 | -- | Zip a vector with its indices.
 79 | indexed :: (G.Vector v e, G.Vector v (Int,e)) => v e -> v (Int,e)
 80 | indexed xs = G.imap (,) xs
 81 | {-# INLINE indexed #-}
 82 | 
 83 | data MM = MM {-# UNPACK #-} !Double {-# UNPACK #-} !Double
 84 | 
 85 | -- | Compute the minimum and maximum of a vector in one pass.
 86 | minMax :: (G.Vector v Double) => v Double -> (Double, Double)
 87 | minMax = fini . G.foldl' go (MM (1/0) (-1/0))
 88 |   where
 89 |     go (MM lo hi) k = MM (min lo k) (max hi k)
 90 |     fini (MM lo hi) = (lo, hi)
 91 | {-# INLINE minMax #-}
 92 | 
 93 | -- | Efficiently compute the next highest power of two for a
 94 | -- non-negative integer.  If the given value is already a power of
 95 | -- two, it is returned unchanged.  If negative, zero is returned.
 96 | nextHighestPowerOfTwo :: Int -> Int
 97 | nextHighestPowerOfTwo n
 98 | #if WORD_SIZE_IN_BITS == 64
 99 |   = 1 + _i32
100 | #else
101 |   = 1 + i16
102 | #endif
103 |   where
104 |     i0   = n - 1
105 |     i1   = i0  .|. i0  `shiftR` 1
106 |     i2   = i1  .|. i1  `shiftR` 2
107 |     i4   = i2  .|. i2  `shiftR` 4
108 |     i8   = i4  .|. i4  `shiftR` 8
109 |     i16  = i8  .|. i8  `shiftR` 16
110 |     _i32 = i16 .|. i16 `shiftR` 32
111 | -- It could be implemented as
112 | --
113 | -- > nextHighestPowerOfTwo n = 1 + foldl' go (n-1) [1, 2, 4, 8, 16, 32]
114 | --     where go m i = m .|. m `shiftR` i
115 | --
116 | -- But GHC do not inline foldl (probably because it's recursive) and
117 | -- as result function walks list of boxed ints. Hand rolled version
118 | -- uses unboxed arithmetic.
119 | 
120 | -- | Multiply a number by itself.
121 | square :: Double -> Double
122 | square x = x * x
123 | 
124 | -- | Simple for loop.  Counts from /start/ to /end/-1.
125 | for :: Monad m => Int -> Int -> (Int -> m ()) -> m ()
126 | for n0 !n f = loop n0
127 |   where
128 |     loop i | i == n    = return ()
129 |            | otherwise = f i >> loop (i+1)
130 | {-# INLINE for #-}
131 | 
132 | -- | Simple reverse-for loop.  Counts from /start/-1 to /end/ (which
133 | -- must be less than /start/).
134 | rfor :: Monad m => Int -> Int -> (Int -> m ()) -> m ()
135 | rfor n0 !n f = loop n0
136 |   where
137 |     loop i | i == n    = return ()
138 |            | otherwise = let i' = i-1 in f i' >> loop i'
139 | {-# INLINE rfor #-}
140 | 
141 | unsafeModify :: M.MVector s Double -> Int -> (Double -> Double) -> ST s ()
142 | unsafeModify v i f = do
143 |   k <- M.unsafeRead v i
144 |   M.unsafeWrite v i (f k)
145 | {-# INLINE unsafeModify #-}
146 | 


--------------------------------------------------------------------------------
/Statistics/Internal.hs:
--------------------------------------------------------------------------------
 1 | -- |
 2 | -- Module    : Statistics.Internal
 3 | -- Copyright : (c) 2009 Bryan O'Sullivan
 4 | -- License   : BSD3
 5 | --
 6 | -- Maintainer  : bos@serpentine.com
 7 | -- Stability   : experimental
 8 | -- Portability : portable
 9 | --
10 | -- 
11 | module Statistics.Internal (
12 |     -- * Default definitions for Show
13 |     defaultShow1
14 |   , defaultShow2
15 |   , defaultShow3
16 |     -- * Default definitions for Read
17 |   , defaultReadPrecM1
18 |   , defaultReadPrecM2
19 |   , defaultReadPrecM3
20 |     -- * Reexports
21 |   , Show(..)
22 |   , Read(..)
23 |   ) where
24 | 
25 | import Control.Applicative
26 | import Control.Monad
27 | import Text.Read
28 | 
29 | 
30 | ----------------------------------------------------------------
31 | -- Default show implementations
32 | ----------------------------------------------------------------
33 | 
34 | defaultShow1 :: (Show a) => String -> a -> Int -> ShowS
35 | defaultShow1 con a n
36 |   = showParen (n >= 11)
37 |   ( showString con
38 |   . showChar ' '
39 |   . showsPrec 11 a
40 |   )
41 | 
42 | defaultShow2 :: (Show a, Show b) => String -> a -> b -> Int -> ShowS
43 | defaultShow2 con a b n
44 |   = showParen (n >= 11)
45 |   ( showString con
46 |   . showChar ' '
47 |   . showsPrec 11 a
48 |   . showChar ' '
49 |   . showsPrec 11 b
50 |   )
51 | 
52 | defaultShow3 :: (Show a, Show b, Show c)
53 |              => String -> a -> b -> c -> Int -> ShowS
54 | defaultShow3 con a b c n
55 |   = showParen (n >= 11)
56 |   ( showString con
57 |   . showChar ' '
58 |   . showsPrec 11 a
59 |   . showChar ' '
60 |   . showsPrec 11 b
61 |   . showChar ' '
62 |   . showsPrec 11 c
63 |   )
64 | 
65 | ----------------------------------------------------------------
66 | -- Default read implementations
67 | ----------------------------------------------------------------
68 | 
69 | defaultReadPrecM1 :: (Read a) => String -> (a -> Maybe r) -> ReadPrec r
70 | defaultReadPrecM1 con f = parens $ prec 10 $ do
71 |   expect con
72 |   a <- readPrec
73 |   maybe empty return $ f a
74 | 
75 | defaultReadPrecM2 :: (Read a, Read b) => String -> (a -> b -> Maybe r) -> ReadPrec r
76 | defaultReadPrecM2 con f = parens $ prec 10 $ do
77 |   expect con
78 |   a <- readPrec
79 |   b <- readPrec
80 |   maybe empty return $ f a b
81 | 
82 | defaultReadPrecM3 :: (Read a, Read b, Read c)
83 |                  => String -> (a -> b -> c -> Maybe r) -> ReadPrec r
84 | defaultReadPrecM3 con f = parens $ prec 10 $ do
85 |   expect con
86 |   a <- readPrec
87 |   b <- readPrec
88 |   c <- readPrec
89 |   maybe empty return $ f a b c
90 | 
91 | expect :: String -> ReadPrec ()
92 | expect str = do
93 |   Ident s <- lexP
94 |   guard (s == str)
95 | 


--------------------------------------------------------------------------------
/Statistics/Resampling/Bootstrap.hs:
--------------------------------------------------------------------------------
  1 | -- |
  2 | -- Module    : Statistics.Resampling.Bootstrap
  3 | -- Copyright : (c) 2009, 2011 Bryan O'Sullivan
  4 | -- License   : BSD3
  5 | --
  6 | -- Maintainer  : bos@serpentine.com
  7 | -- Stability   : experimental
  8 | -- Portability : portable
  9 | --
 10 | -- The bootstrap method for statistical inference.
 11 | 
 12 | module Statistics.Resampling.Bootstrap
 13 |     ( bootstrapBCA
 14 |     , basicBootstrap
 15 |     -- * References
 16 |     -- $references
 17 |     ) where
 18 | 
 19 | import           Data.Vector.Generic ((!))
 20 | import qualified Data.Vector.Unboxed as U
 21 | import qualified Data.Vector.Generic as G
 22 | 
 23 | import Statistics.Distribution (cumulative, quantile)
 24 | import Statistics.Distribution.Normal
 25 | import Statistics.Resampling (Bootstrap(..), jackknife)
 26 | import Statistics.Sample (mean)
 27 | import Statistics.Types (Sample, CL, Estimate, ConfInt, estimateFromInterval,
 28 |                          estimateFromErr, CL, significanceLevel)
 29 | import Statistics.Function (gsort)
 30 | 
 31 | import qualified Statistics.Resampling as R
 32 | 
 33 | import Control.Parallel.Strategies (parMap, rdeepseq)
 34 | 
 35 | data T = {-# UNPACK #-} !Double :< {-# UNPACK #-} !Double
 36 | infixl 2 :<
 37 | 
 38 | -- | Bias-corrected accelerated (BCA) bootstrap. This adjusts for both
 39 | --   bias and skewness in the resampled distribution.
 40 | --
 41 | --   BCA algorithm is described in ch. 5 of Davison, Hinkley "Confidence
 42 | --   intervals" in section 5.3 "Percentile method"
 43 | bootstrapBCA
 44 |   :: CL Double       -- ^ Confidence level
 45 |   -> Sample          -- ^ Full data sample
 46 |   -> [(R.Estimator, Bootstrap U.Vector Double)]
 47 |   -- ^ Estimates obtained from resampled data and estimator used for
 48 |   --   this.
 49 |   -> [Estimate ConfInt Double]
 50 | bootstrapBCA confidenceLevel sample resampledData
 51 |   = parMap rdeepseq e resampledData
 52 |   where
 53 |     e (est, Bootstrap pt resample)
 54 |       | U.length sample == 1 || isInfinite bias =
 55 |           estimateFromErr      pt (0,0) confidenceLevel
 56 |       | otherwise =
 57 |           estimateFromInterval pt (resample ! lo, resample ! hi) confidenceLevel
 58 |       where
 59 |         -- Quantile estimates for given CL
 60 |         lo    = min (max (cumn a1) 0) (ni - 1)
 61 |           where a1 = bias + b1 / (1 - accel * b1)
 62 |                 b1 = bias + z1
 63 |         hi    = max (min (cumn a2) (ni - 1)) 0
 64 |           where a2 = bias + b2 / (1 - accel * b2)
 65 |                 b2 = bias - z1
 66 |         -- Number of resamples
 67 |         ni    = U.length resample
 68 |         n     = fromIntegral ni
 69 |         -- Corrections
 70 |         z1    = quantile standard (significanceLevel confidenceLevel / 2)
 71 |         cumn  = round . (*n) . cumulative standard
 72 |         bias  = quantile standard (probN / n)
 73 |           where probN = fromIntegral . U.length . U.filter (<pt) $ resample
 74 |         accel = sumCubes / (6 * (sumSquares ** 1.5))
 75 |           where (sumSquares :< sumCubes) = U.foldl' f (0 :< 0) jack
 76 |                 f (s :< c) j = s + d2 :< c + d2 * d
 77 |                     where d  = jackMean - j
 78 |                           d2 = d * d
 79 |                 jackMean     = mean jack
 80 |         jack  = jackknife est sample
 81 | 
 82 | 
 83 | -- | Basic bootstrap. This method simply uses empirical quantiles for
 84 | --   confidence interval.
 85 | basicBootstrap
 86 |   :: (G.Vector v a, Ord a, Num a)
 87 |   => CL Double       -- ^ Confidence vector
 88 |   -> Bootstrap v a   -- ^ Estimate from full sample and vector of
 89 |                      --   estimates obtained from resamples
 90 |   -> Estimate ConfInt a
 91 | {-# INLINE basicBootstrap #-}
 92 | basicBootstrap cl (Bootstrap e ests)
 93 |   = estimateFromInterval e (sorted ! lo, sorted ! hi) cl
 94 |   where
 95 |     sorted = gsort ests
 96 |     n  = fromIntegral $ G.length ests
 97 |     c  = n * (significanceLevel cl / 2)
 98 |     -- FIXME: can we have better estimates of quantiles in case when p
 99 |     --        is not multiple of 1/N
100 |     --
101 |     -- FIXME: we could have undercoverage here
102 |     lo = round c
103 |     hi = truncate (n - c)
104 | 
105 | -- $references
106 | --
107 | -- * Davison, A.C; Hinkley, D.V. (1997) Bootstrap methods and their
108 | --   application. <http://statwww.epfl.ch/davison/BMA/>
109 | 


--------------------------------------------------------------------------------
/Statistics/Sample/Histogram.hs:
--------------------------------------------------------------------------------
  1 | {-# LANGUAGE FlexibleContexts, BangPatterns, ScopedTypeVariables #-}
  2 | 
  3 | -- |
  4 | -- Module    : Statistics.Sample.Histogram
  5 | -- Copyright : (c) 2011 Bryan O'Sullivan
  6 | -- License   : BSD3
  7 | --
  8 | -- Maintainer  : bos@serpentine.com
  9 | -- Stability   : experimental
 10 | -- Portability : portable
 11 | --
 12 | -- Functions for computing histograms of sample data.
 13 | 
 14 | module Statistics.Sample.Histogram
 15 |     (
 16 |       histogram
 17 |     -- * Building blocks
 18 |     , histogram_
 19 |     , range
 20 |     ) where
 21 | 
 22 | import Control.Monad.ST
 23 | import Numeric.MathFunctions.Constants (m_epsilon,m_tiny)
 24 | import Statistics.Function (minMax)
 25 | import qualified Data.Vector.Generic as G
 26 | import qualified Data.Vector.Generic.Mutable as GM
 27 | 
 28 | -- | /O(n)/ Compute a histogram over a data set.
 29 | --
 30 | -- The result consists of a pair of vectors:
 31 | --
 32 | -- * The lower bound of each interval.
 33 | --
 34 | -- * The number of samples within the interval.
 35 | --
 36 | -- Interval (bin) sizes are uniform, and the upper and lower bounds
 37 | -- are chosen automatically using the 'range' function.  To specify
 38 | -- these parameters directly, use the 'histogram_' function.
 39 | histogram :: (G.Vector v0 Double, G.Vector v1 Double, Num b, G.Vector v1 b) =>
 40 |              Int                -- ^ Number of bins (must be positive).
 41 |           -> v0 Double          -- ^ Sample data (cannot be empty).
 42 |           -> (v1 Double, v1 b)
 43 | histogram numBins xs = (G.generate numBins step, histogram_ numBins lo hi xs)
 44 |     where (lo,hi)    = range numBins xs
 45 |           step i     = lo + d * fromIntegral i
 46 |           d          = (hi - lo) / fromIntegral numBins
 47 | {-# INLINE histogram #-}
 48 | 
 49 | -- | /O(n)/ Compute a histogram over a data set.
 50 | --
 51 | -- Interval (bin) sizes are uniform, based on the supplied upper
 52 | -- and lower bounds.
 53 | histogram_ :: forall b a v0 v1. (Num b, RealFrac a, G.Vector v0 a, G.Vector v1 b) =>
 54 |               Int
 55 |            -- ^ Number of bins.  This value must be positive.  A zero
 56 |            -- or negative value will cause an error.
 57 |            -> a
 58 |            -- ^ Lower bound on interval range.  Sample data less than
 59 |            -- this will cause an error.
 60 |            -> a
 61 |            -- ^ Upper bound on interval range.  This value must not be
 62 |            -- less than the lower bound.  Sample data that falls above
 63 |            -- the upper bound will cause an error.
 64 |            -> v0 a
 65 |            -- ^ Sample data.
 66 |            -> v1 b
 67 | histogram_ numBins lo hi xs0 = G.create (GM.replicate numBins 0 >>= bin xs0)
 68 |   where
 69 |     bin :: forall s. v0 a -> G.Mutable v1 s b -> ST s (G.Mutable v1 s b)
 70 |     bin xs bins = go 0
 71 |      where
 72 |        go i | i >= len = return bins
 73 |             | otherwise = do
 74 |          let x = xs `G.unsafeIndex` i
 75 |              b = truncate $ (x - lo) / d
 76 |          write' bins b . (+1) =<< GM.read bins b
 77 |          go (i+1)
 78 |        write' bins' b !e = GM.write bins' b e
 79 |        len = G.length xs
 80 |        d = ((hi - lo) / fromIntegral numBins) * (1 + realToFrac m_epsilon)
 81 | {-# INLINE histogram_ #-}
 82 | 
 83 | -- | /O(n)/ Compute decent defaults for the lower and upper bounds of
 84 | -- a histogram, based on the desired number of bins and the range of
 85 | -- the sample data.
 86 | --
 87 | -- The upper and lower bounds used are @(lo-d, hi+d)@, where
 88 | --
 89 | -- @d = (maximum sample - minimum sample) / ((bins - 1) * 2)@
 90 | --
 91 | -- If all elements in the sample are the same and equal to @x@ range
 92 | -- is set to @(x - |x|/10, x + |x|/10)@. And if @x@ is equal to 0 range
 93 | -- is set to @(-1,1)@. This is needed to avoid creating histogram with
 94 | -- zero bin size.
 95 | range :: (G.Vector v Double) =>
 96 |          Int                    -- ^ Number of bins (must be positive).
 97 |       -> v Double               -- ^ Sample data (cannot be empty).
 98 |       -> (Double, Double)
 99 | range numBins xs
100 |     | numBins < 1 = error "Statistics.Histogram.range: invalid bin count"
101 |     | G.null xs   = error "Statistics.Histogram.range: empty sample"
102 |     | lo == hi    = case abs lo / 10 of
103 |                       a | a < m_tiny -> (-1,1)
104 |                         | otherwise  -> (lo - a, lo + a)
105 |     | otherwise   = (lo-d, hi+d)
106 |   where
107 |     d | numBins == 1 = 0
108 |       | otherwise    = (hi - lo) / ((fromIntegral numBins - 1) * 2)
109 |     (lo,hi)          = minMax xs
110 | {-# INLINE range #-}
111 | 


--------------------------------------------------------------------------------
/Statistics/Sample/Internal.hs:
--------------------------------------------------------------------------------
 1 | {-# LANGUAGE FlexibleContexts #-}
 2 | 
 3 | -- |
 4 | -- Module    : Statistics.Sample.Internal
 5 | -- Copyright : (c) 2013 Bryan O'Sullivan
 6 | -- License   : BSD3
 7 | --
 8 | -- Maintainer  : bos@serpentine.com
 9 | -- Stability   : experimental
10 | -- Portability : portable
11 | --
12 | -- Internal functions for computing over samples.
13 | module Statistics.Sample.Internal
14 |     (
15 |       robustSumVar
16 |     , sum
17 |     ) where
18 | 
19 | import Numeric.Sum (kbn, sumVector)
20 | import Prelude hiding (sum)
21 | import Statistics.Function (square)
22 | import qualified Data.Vector.Generic as G
23 | 
24 | robustSumVar :: (G.Vector v Double) => Double -> v Double -> Double
25 | robustSumVar m = sum . G.map (square . subtract m)
26 | {-# INLINE robustSumVar #-}
27 | 
28 | sum :: (G.Vector v Double) => v Double -> Double
29 | sum = sumVector kbn
30 | {-# INLINE sum #-}
31 | 


--------------------------------------------------------------------------------
/Statistics/Sample/KernelDensity.hs:
--------------------------------------------------------------------------------
  1 | {-# LANGUAGE BangPatterns, FlexibleContexts, UnboxedTuples #-}
  2 | -- |
  3 | -- Module    : Statistics.Sample.KernelDensity
  4 | -- Copyright : (c) 2011 Bryan O'Sullivan
  5 | -- License   : BSD3
  6 | --
  7 | -- Maintainer  : bos@serpentine.com
  8 | -- Stability   : experimental
  9 | -- Portability : portable
 10 | --
 11 | -- Kernel density estimation.  This module provides a fast, robust,
 12 | -- non-parametric way to estimate the probability density function of
 13 | -- a sample.
 14 | --
 15 | -- This estimator does not use the commonly employed \"Gaussian rule
 16 | -- of thumb\".  As a result, it outperforms many plug-in methods on
 17 | -- multimodal samples with widely separated modes.
 18 | 
 19 | module Statistics.Sample.KernelDensity
 20 |     (
 21 |     -- * Estimation functions
 22 |       kde
 23 |     , kde_
 24 |     -- * References
 25 |     -- $references
 26 |     ) where
 27 | 
 28 | import Data.Default.Class
 29 | import Numeric.MathFunctions.Constants (m_sqrt_2_pi)
 30 | import Numeric.RootFinding             (fromRoot, ridders, RiddersParam(..), Tolerance(..))
 31 | import Prelude hiding (const, min, max, sum)
 32 | import Statistics.Function (minMax, nextHighestPowerOfTwo)
 33 | import Statistics.Sample.Histogram (histogram_)
 34 | import Statistics.Sample.Internal (sum)
 35 | import Statistics.Transform (CD, dct, idct)
 36 | import qualified Data.Vector.Generic  as G
 37 | import qualified Data.Vector.Unboxed  as U
 38 | import qualified Data.Vector          as V
 39 | 
 40 | 
 41 | -- | Gaussian kernel density estimator for one-dimensional data, using
 42 | -- the method of Botev et al.
 43 | --
 44 | -- The result is a pair of vectors, containing:
 45 | --
 46 | -- * The coordinates of each mesh point.  The mesh interval is chosen
 47 | --   to be 20% larger than the range of the sample.  (To specify the
 48 | --   mesh interval, use 'kde_'.)
 49 | --
 50 | -- * Density estimates at each mesh point.
 51 | kde :: (G.Vector v CD, G.Vector v Double, G.Vector v Int)
 52 |     => Int
 53 |     -- ^ The number of mesh points to use in the uniform discretization
 54 |     -- of the interval @(min,max)@.  If this value is not a power of
 55 |     -- two, then it is rounded up to the next power of two.
 56 |     -> v Double -> (v Double, v Double)
 57 | kde n0 xs = kde_ n0 (lo - range / 10) (hi + range / 10) xs
 58 |   where
 59 |     (lo,hi) = minMax xs
 60 |     range   | G.length xs <= 1 = 1       -- Unreasonable guess
 61 |             | lo == hi         = 1       -- All elements are equal
 62 |             | otherwise        = hi - lo
 63 | {-# INLINABLE  kde #-}
 64 | {-# SPECIAlIZE kde :: Int -> U.Vector Double -> (U.Vector Double, U.Vector Double) #-}
 65 | {-# SPECIAlIZE kde :: Int -> V.Vector Double -> (V.Vector Double, V.Vector Double) #-}
 66 | 
 67 | 
 68 | -- | Gaussian kernel density estimator for one-dimensional data, using
 69 | -- the method of Botev et al.
 70 | --
 71 | -- The result is a pair of vectors, containing:
 72 | --
 73 | -- * The coordinates of each mesh point.
 74 | --
 75 | -- * Density estimates at each mesh point.
 76 | kde_ :: (G.Vector v CD, G.Vector v Double, G.Vector v Int)
 77 |      => Int
 78 |      -- ^ The number of mesh points to use in the uniform discretization
 79 |      -- of the interval @(min,max)@.  If this value is not a power of
 80 |      -- two, then it is rounded up to the next power of two.
 81 |      -> Double
 82 |      -- ^ Lower bound (@min@) of the mesh range.
 83 |      -> Double
 84 |      -- ^ Upper bound (@max@) of the mesh range.
 85 |      -> v Double
 86 |      -> (v Double, v Double)
 87 | kde_ n0 min max xs
 88 |   | G.null xs = error "Statistics.KernelDensity.kde: empty sample"
 89 |   | n0 <= 1   = error "Statistics.KernelDensity.kde: invalid number of points"
 90 |   | otherwise = (mesh, density)
 91 |   where
 92 |     mesh = G.generate ni $ \z -> min + (d * fromIntegral z)
 93 |         where d = r / (n-1)
 94 |     density = G.map (/(2 * r)) . idct $ G.zipWith f a (G.enumFromTo 0 (n-1))
 95 |       where f b z = b * exp (sqr z * sqr pi * t_star * (-0.5))
 96 |     !n  = fromIntegral ni
 97 |     !ni = nextHighestPowerOfTwo n0
 98 |     !r  = max - min
 99 |     a   = dct . G.map (/ sum h) $ h
100 |         where h = G.map (/ len) $ histogram_ ni min max xs
101 |     !len    = fromIntegral (G.length xs)
102 |     !t_star = fromRoot (0.28 * len ** (-0.4)) . ridders def{ riddersTol = AbsTol 1e-14 } (0,0.1)
103 |             $ \x -> x - (len * (2 * sqrt pi) * go 6 (f 7 x)) ** (-0.4)
104 |       where
105 |         f q t = 2 * pi ** (q*2) * sum (G.zipWith g iv a2v)
106 |           where g i a2 = i ** q * a2 * exp ((-i) * sqr pi * t)
107 |                 a2v = G.map (sqr . (*0.5)) $ G.tail a
108 |                 iv = G.map sqr $ G.enumFromTo 1 (n-1)
109 |         go s !h | s == 1    = h
110 |                 | otherwise = go (s-1) (f s time)
111 |           where time  = (2 * const * k0 / len / h) ** (2 / (3 + 2 * s))
112 |                 const = (1 + 0.5 ** (s+0.5)) / 3
113 |                 k0    = U.product (G.enumFromThenTo 1 3 (2*s-1)) / m_sqrt_2_pi
114 |     sqr x = x * x
115 | {-# INLINABLE  kde_ #-}
116 | {-# SPECIAlIZE kde_ :: Int -> Double -> Double -> U.Vector Double -> (U.Vector Double, U.Vector Double) #-}
117 | {-# SPECIAlIZE kde_ :: Int -> Double -> Double -> V.Vector Double -> (V.Vector Double, V.Vector Double) #-}
118 | 
119 | 
120 | -- $references
121 | --
122 | -- Botev. Z.I., Grotowski J.F., Kroese D.P. (2010). Kernel density
123 | -- estimation via diffusion. /Annals of Statistics/
124 | -- 38(5):2916&#8211;2957. <http://arxiv.org/pdf/1011.2602>
125 | 


--------------------------------------------------------------------------------
/Statistics/Sample/Normalize.hs:
--------------------------------------------------------------------------------
 1 | {-# LANGUAGE FlexibleContexts #-}
 2 | 
 3 | -- |
 4 | -- Module    : Statistics.Sample.Normalize
 5 | -- Copyright : (c) 2017 Gregory W. Schwartz
 6 | -- License   : BSD3
 7 | --
 8 | -- Maintainer  : gsch@mail.med.upenn.edu
 9 | -- Stability   : experimental
10 | -- Portability : portable
11 | --
12 | -- Functions for normalizing samples.
13 | 
14 | module Statistics.Sample.Normalize
15 |     (
16 |       standardize
17 |     ) where
18 | 
19 | import Statistics.Sample
20 | import qualified Data.Vector.Generic  as G
21 | import qualified Data.Vector          as V
22 | import qualified Data.Vector.Unboxed  as U
23 | import qualified Data.Vector.Storable as S
24 | 
25 | -- | /O(n)/ Normalize a sample using standard scores:
26 | --
27 | --   \[ z = \frac{x - \mu}{\sigma} \]
28 | --
29 | --   Where μ is sample mean and σ is standard deviation computed from
30 | --   unbiased variance estimation. If sample to small to compute σ or
31 | --   it's equal to 0 @Nothing@ is returned.
32 | standardize :: (G.Vector v Double) => v Double -> Maybe (v Double)
33 | standardize xs
34 |   | G.length xs < 2 = Nothing
35 |   | sigma == 0      = Nothing
36 |   | otherwise       = Just $ G.map (\x -> (x - mu) / sigma) xs
37 |   where
38 |     mu    = mean   xs
39 |     sigma = stdDev xs
40 | {-# INLINABLE  standardize #-}
41 | {-# SPECIALIZE standardize :: V.Vector Double -> Maybe (V.Vector Double) #-}
42 | {-# SPECIALIZE standardize :: U.Vector Double -> Maybe (U.Vector Double) #-}
43 | {-# SPECIALIZE standardize :: S.Vector Double -> Maybe (S.Vector Double) #-}
44 | 


--------------------------------------------------------------------------------
/Statistics/Test/ChiSquared.hs:
--------------------------------------------------------------------------------
 1 | {-# LANGUAGE FlexibleContexts #-}
 2 | -- | Pearson's chi squared test.
 3 | module Statistics.Test.ChiSquared (
 4 |     chi2test
 5 |   , chi2testCont
 6 |   , module Statistics.Test.Types
 7 |   ) where
 8 | 
 9 | import Prelude hiding (sum)
10 | 
11 | import Statistics.Distribution
12 | import Statistics.Distribution.ChiSquared
13 | import Statistics.Function        (square)
14 | import Statistics.Sample.Internal (sum)
15 | import Statistics.Test.Types
16 | import Statistics.Types
17 | import qualified Data.Vector as V
18 | import qualified Data.Vector.Generic as G
19 | import qualified Data.Vector.Unboxed as U
20 | 
21 | 
22 | 
23 | -- | Generic form of Pearson chi squared tests for binned data. Data
24 | --   sample is supplied in form of tuples (observed quantity,
25 | --   expected number of events). Both must be positive.
26 | --
27 | --   This test should be used only if all bins have expected values of
28 | --   at least 5.
29 | chi2test :: (G.Vector v (Int,Double), G.Vector v Double)
30 |          => Int                 -- ^ Number of additional degrees of
31 |                                 --   freedom. One degree of freedom
32 |                                 --   is due to the fact that the are
33 |                                 --   N observation in total and
34 |                                 --   accounted for automatically.
35 |          -> v (Int,Double)      -- ^ Observation and expectation.
36 |          -> Maybe (Test ChiSquared)
37 | chi2test ndf vec
38 |   | ndf <  0  = error $ "Statistics.Test.ChiSquare.chi2test: negative NDF " ++ show ndf
39 |   | n   > 0   = Just Test
40 |               { testSignificance = mkPValue $ complCumulative d chi2
41 |               , testStatistics   = chi2
42 |               , testDistribution = chiSquared n
43 |               }
44 |   | otherwise = Nothing
45 |   where
46 |     n     = G.length vec - ndf - 1
47 |     chi2  = sum $ G.map (\(o,e) -> square (fromIntegral o - e) / e) vec
48 |     d     = chiSquared n
49 | {-# INLINABLE  chi2test #-}
50 | {-# SPECIALIZE
51 |     chi2test :: Int -> U.Vector (Int,Double) -> Maybe (Test ChiSquared) #-}
52 | {-# SPECIALIZE
53 |     chi2test :: Int -> V.Vector (Int,Double) -> Maybe (Test ChiSquared) #-}
54 | 
55 | 
56 | -- | Chi squared test for data with normal errors. Data is supplied in
57 | --   form of pair (observation with error, and expectation).
58 | chi2testCont
59 |   :: (G.Vector v (Estimate NormalErr Double, Double), G.Vector v Double)
60 |   => Int                                   -- ^ Number of additional
61 |                                            --   degrees of freedom.
62 |   -> v (Estimate NormalErr Double, Double) -- ^ Observation and expectation.
63 |   -> Maybe (Test ChiSquared)
64 | chi2testCont ndf vec
65 |   | ndf < 0   = error $ "Statistics.Test.ChiSquare.chi2testCont: negative NDF " ++ show ndf
66 |   | n   > 0   = Just Test
67 |               { testSignificance = mkPValue $ complCumulative d chi2
68 |               , testStatistics   = chi2
69 |               , testDistribution = chiSquared n
70 |               }
71 |   | otherwise = Nothing
72 |   where
73 |     n     = G.length vec - ndf - 1
74 |     chi2  = sum $ G.map (\(Estimate o (NormalErr s),e) -> square (o - e) / s) vec
75 |     d     = chiSquared n
76 | 


--------------------------------------------------------------------------------
/Statistics/Test/Internal.hs:
--------------------------------------------------------------------------------
 1 | {-# LANGUAGE FlexibleContexts #-}
 2 | module Statistics.Test.Internal (
 3 |     rank
 4 |   , rankUnsorted  
 5 |   , splitByTags  
 6 |   ) where
 7 | 
 8 | import Data.Ord
 9 | import           Data.Vector.Generic           ((!))
10 | import qualified Data.Vector.Generic         as G
11 | import qualified Data.Vector.Unboxed         as U
12 | import qualified Data.Vector.Generic.Mutable as M
13 | import Statistics.Function
14 | 
15 | 
16 | -- Private data type for unfolding
17 | data Rank v a = Rank {
18 |       rankCnt :: {-# UNPACK #-} !Int        -- Number of ranks to return
19 |     , rankVal :: {-# UNPACK #-} !Double     -- Rank to return
20 |     , rankNum :: {-# UNPACK #-} !Double     -- Current rank
21 |     , rankVec :: v a                        -- Remaining vector
22 |     }
23 | 
24 | -- | Calculate rank of every element of sample. In case of ties ranks
25 | --   are averaged. Sample should be already sorted in ascending order.
26 | --
27 | --   Rank is index of element in the sample, numeration starts from 1.
28 | --   In case of ties average of ranks of equal elements is assigned
29 | --   to each
30 | --
31 | -- >>> import qualified Data.Vector.Unboxed as VU
32 | -- >>> rank (==) (VU.fromList [10,20,30::Int])
33 | -- [1.0,2.0,3.0]
34 | --
35 | -- >>> rank (==) (VU.fromList [10,10,10,30::Int])
36 | -- [2.0,2.0,2.0,4.0]
37 | rank :: (G.Vector v a)
38 |      => (a -> a -> Bool)        -- ^ Equivalence relation
39 |      -> v a                     -- ^ Vector to rank
40 |      -> U.Vector Double
41 | rank eq vec = G.unfoldr go (Rank 0 (-1) 1 vec)
42 |   where
43 |     go (Rank 0 _ r v)
44 |       | G.null v  = Nothing
45 |       | otherwise =
46 |           case G.length h of
47 |             1 -> Just (r, Rank 0 0 (r+1) rest)
48 |             n -> go Rank { rankCnt = n
49 |                          , rankVal = 0.5 * (r*2 + fromIntegral (n-1))
50 |                          , rankNum = r + fromIntegral n
51 |                          , rankVec = rest
52 |                          }
53 |           where
54 |             (h,rest) = G.span (eq $ G.head v) v
55 |     go (Rank n val r v) = Just (val, Rank (n-1) val r v)
56 | {-# INLINE rank #-}
57 | 
58 | -- | Compute rank of every element of vector. Unlike rank it doesn't
59 | --   require sample to be sorted.
60 | rankUnsorted :: ( Ord a
61 |                 , G.Vector v a
62 |                 , G.Vector v Int
63 |                 , G.Vector v (Int, a)
64 |                 )
65 |              => v a
66 |              -> U.Vector Double
67 | rankUnsorted xs = G.create $ do
68 |     -- Put ranks into their original positions
69 |     -- NOTE: backpermute will do wrong thing
70 |     vec <- M.new n
71 |     for 0 n $ \i ->
72 |       M.unsafeWrite vec (index ! i) (ranks ! i)
73 |     return vec
74 |   where
75 |     n = G.length xs
76 |     -- Calculate ranks for sorted array
77 |     ranks = rank (==) sorted
78 |     -- Sort vector and retain original indices of elements
79 |     (index, sorted)
80 |       = G.unzip
81 |       $ sortBy (comparing snd)
82 |       $ indexed xs
83 | {-# INLINE rankUnsorted #-}
84 | 
85 | 
86 | -- | Split tagged vector
87 | splitByTags :: (G.Vector v a, G.Vector v (Bool,a)) => v (Bool,a) -> (v a, v a)
88 | splitByTags vs = (G.map snd a, G.map snd b)
89 |   where
90 |     (a,b) = G.unstablePartition fst vs
91 | {-# INLINE splitByTags #-}
92 | 


--------------------------------------------------------------------------------
/Statistics/Test/KruskalWallis.hs:
--------------------------------------------------------------------------------
  1 | -- |
  2 | -- Module    : Statistics.Test.KruskalWallis
  3 | -- Copyright : (c) 2014 Danny Navarro
  4 | -- License   : BSD3
  5 | --
  6 | -- Maintainer  : bos@serpentine.com
  7 | -- Stability   : experimental
  8 | -- Portability : portable
  9 | --
 10 | module Statistics.Test.KruskalWallis
 11 |   ( -- * Kruskal-Wallis test
 12 |     kruskalWallisTest
 13 |     -- ** Building blocks
 14 |   , kruskalWallisRank
 15 |   , kruskalWallis
 16 |   , module Statistics.Test.Types
 17 |   ) where
 18 | 
 19 | import Data.Ord (comparing)
 20 | import qualified Data.Vector.Unboxed as U
 21 | import Statistics.Function (sort, sortBy, square)
 22 | import Statistics.Distribution (complCumulative)
 23 | import Statistics.Distribution.ChiSquared (chiSquared)
 24 | import Statistics.Types
 25 | import Statistics.Test.Types
 26 | import Statistics.Test.Internal (rank)
 27 | import Statistics.Sample
 28 | import qualified Statistics.Sample.Internal as Sample(sum)
 29 | 
 30 | 
 31 | -- | Kruskal-Wallis ranking.
 32 | --
 33 | -- All values are replaced by the absolute rank in the combined samples.
 34 | --
 35 | -- The samples and values need not to be ordered but the values in the result
 36 | -- are ordered. Assigned ranks (ties are given their average rank).
 37 | kruskalWallisRank :: (U.Unbox a, Ord a) => [U.Vector a] -> [U.Vector Double]
 38 | kruskalWallisRank samples = groupByTags
 39 |                           . sortBy (comparing fst)
 40 |                           . U.zip tags
 41 |                           $ rank (==) joinSample
 42 |   where
 43 |     (tags,joinSample) = U.unzip
 44 |                       . sortBy (comparing snd)
 45 |                       $ foldMap (uncurry tagSample) $ zip [(1::Int)..] samples
 46 |     tagSample t = U.map (\x -> (t,x))
 47 | 
 48 |     groupByTags xs
 49 |         | U.null xs = []
 50 |         | otherwise = sort (U.map snd ys) : groupByTags zs
 51 |       where
 52 |         (ys,zs) = U.span ((==) (fst $ U.head xs) . fst) xs
 53 | 
 54 | 
 55 | -- | The Kruskal-Wallis Test.
 56 | --
 57 | -- In textbooks the output value is usually represented by 'K' or 'H'. This
 58 | -- function already does the ranking.
 59 | kruskalWallis :: (U.Unbox a, Ord a) => [U.Vector a] -> Double
 60 | kruskalWallis samples = (nTot - 1) * numerator / denominator
 61 |   where
 62 |     -- Total number of elements in all samples
 63 |     nTot    = fromIntegral $ sumWith rsamples U.length
 64 |     -- Average rank of all samples
 65 |     avgRank = (nTot + 1) / 2
 66 |     --
 67 |     numerator = sumWith rsamples $ \sample ->
 68 |         let n = fromIntegral $ U.length sample
 69 |         in  n * square (mean sample - avgRank)
 70 |     denominator = sumWith rsamples $ \sample ->
 71 |         Sample.sum $ U.map (\r -> square (r - avgRank)) sample
 72 | 
 73 |     rsamples = kruskalWallisRank samples
 74 | 
 75 | 
 76 | -- | Perform Kruskal-Wallis Test for the given samples and required
 77 | -- significance. For additional information check 'kruskalWallis'. This is just
 78 | -- a helper function.
 79 | --
 80 | -- It uses /Chi-Squared/ distribution for approximation as long as the sizes are
 81 | -- larger than 5. Otherwise the test returns 'Nothing'.
 82 | kruskalWallisTest :: (Ord a, U.Unbox a) => [U.Vector a] -> Maybe (Test ())
 83 | kruskalWallisTest []      = Nothing
 84 | kruskalWallisTest samples
 85 |   -- We use chi-squared approximation here
 86 |   | all (>4) ns = Just Test { testSignificance = mkPValue $ complCumulative d k
 87 |                             , testStatistics   = k
 88 |                             , testDistribution = ()
 89 |                             }
 90 |   | otherwise   = Nothing
 91 |   where
 92 |     k  = kruskalWallis samples
 93 |     ns = map U.length samples
 94 |     d  = chiSquared (length ns - 1)
 95 | 
 96 | -- * Helper functions
 97 | 
 98 | sumWith :: Num a => [Sample] -> (Sample -> a) -> a
 99 | sumWith samples f = Prelude.sum $ fmap f samples
100 | {-# INLINE sumWith #-}
101 | 


--------------------------------------------------------------------------------
/Statistics/Test/Runs.hs:
--------------------------------------------------------------------------------
 1 | {-# LANGUAGE BangPatterns #-}
 2 | {-# LANGUAGE FlexibleContexts #-}
 3 | -- | Goodness of fit runs tests.
 4 | module Statistics.Test.Runs (
 5 |     runsTest
 6 |   , module Statistics.Test.Types
 7 |   ) where
 8 | 
 9 | import qualified Data.Vector.Generic as G
10 | import qualified Data.Vector.Unboxed as U
11 | import Numeric.SpecFunctions (choose)
12 | import Prelude hiding (sum)
13 | 
14 | import Statistics.Sample.Internal (sum)
15 | import Statistics.Test.Types
16 | import Statistics.Types
17 | 
18 | 
19 | -- | Goodness of fit test for binned data. It uses only sign of
20 | --   deviations of observations from their expectations. Null
21 | --   hypothesis is that all possible patterns of sign occurrences are
22 | --   equiprobable.
23 | --
24 | --   It's asymptotically independent from chi-square test. So their
25 | --   results could be directly combined
26 | runsTest :: (G.Vector v Bool) => v Bool -> Test ()
27 | {-# INLINE runsTest #-}
28 | runsTest v
29 |   = Test { testSignificance = mkPValue $ cumulativeProb n m r
30 |          , testStatistics   = fromIntegral r
31 |          , testDistribution = ()
32 |          }
33 |   where
34 |     (n,m,r) = computeRuns v
35 | 
36 | -- Compute number of positive elements, negative elements and runs
37 | computeRuns :: (G.Vector v Bool) => v Bool -> (Int,Int,Int)
38 | {-# INLINE computeRuns #-}
39 | computeRuns v
40 |   = fini $ G.foldl' step (0,0,0,Nothing) v
41 |   where
42 |     step (!nP,!nM,!nR,!old) f =
43 |       ( if f then nP+1 else nP
44 |       , if f then nM   else nM+1
45 |       , if old == Just f then nR else nR+1
46 |       , Just f
47 |       )
48 |     fini (nP,nM,nR,_) = (nP,nM,nR)
49 | 
50 | -- Compute denormalized probability of getting R runs given N positive
51 | -- and M positive elements
52 | denormProbability :: Int -> Int -> Int -> Double
53 | denormProbability n m r
54 |   | even r    = 2 * ((m-1) `choose` (s-1)) * ((n-1) `choose` (s-1))
55 |   | otherwise = ((m-1) `choose` (s-1)) * ((n-1) `choose` (s-2))
56 |               + ((m-1) `choose` (s-2)) * ((n-1) `choose` (s-1))
57 |   where
58 |     s = r `quot` 2
59 | 
60 | -- Probability of getting R<=R[observed]
61 | cumulativeProb :: Int -> Int -> Int -> Double
62 | cumulativeProb n m r
63 |   = min 1
64 |   $ sum (U.map (denormProbability n m) $ U.enumFromTo 1 r)
65 |   / ((n+m) `choose` m)
66 | 


--------------------------------------------------------------------------------
/Statistics/Test/StudentT.hs:
--------------------------------------------------------------------------------
  1 | {-# LANGUAGE FlexibleContexts, Rank2Types, ScopedTypeVariables #-}
  2 | -- | Student's T-test is for assessing whether two samples have
  3 | --   different mean. This module contain several variations of
  4 | --   T-test. It's a parametric tests and assumes that samples are
  5 | --   normally distributed.
  6 | module Statistics.Test.StudentT
  7 |     (
  8 |       studentTTest
  9 |     , welchTTest
 10 |     , pairedTTest
 11 |     , module Statistics.Test.Types
 12 |     ) where
 13 | 
 14 | import Statistics.Distribution hiding (mean)
 15 | import Statistics.Distribution.StudentT
 16 | import Statistics.Sample (mean, varianceUnbiased)
 17 | import Statistics.Test.Types
 18 | import Statistics.Types    (mkPValue,PValue)
 19 | import Statistics.Function (square)
 20 | import qualified Data.Vector.Generic  as G
 21 | import qualified Data.Vector.Unboxed  as U
 22 | import qualified Data.Vector.Storable as S
 23 | import qualified Data.Vector          as V
 24 | 
 25 | 
 26 | 
 27 | -- | Two-sample Student's t-test. It assumes that both samples are
 28 | --   normally distributed and have same variance. Returns @Nothing@ if
 29 | --   sample sizes are not sufficient.
 30 | studentTTest :: (G.Vector v Double)
 31 |              => PositionTest  -- ^ one- or two-tailed test
 32 |              -> v Double      -- ^ Sample A
 33 |              -> v Double      -- ^ Sample B
 34 |              -> Maybe (Test StudentT)
 35 | studentTTest test sample1 sample2
 36 |   | G.length sample1 < 2 || G.length sample2 < 2 = Nothing
 37 |   | otherwise                                    = Just Test
 38 |       { testSignificance = significance test t ndf
 39 |       , testStatistics   = t
 40 |       , testDistribution = studentT ndf
 41 |       }
 42 |   where
 43 |     (t, ndf) = tStatistics True sample1 sample2
 44 | {-# INLINABLE  studentTTest #-}
 45 | {-# SPECIALIZE studentTTest :: PositionTest -> U.Vector Double -> U.Vector Double -> Maybe (Test StudentT) #-}
 46 | {-# SPECIALIZE studentTTest :: PositionTest -> S.Vector Double -> S.Vector Double -> Maybe (Test StudentT) #-}
 47 | {-# SPECIALIZE studentTTest :: PositionTest -> V.Vector Double -> V.Vector Double -> Maybe (Test StudentT) #-}
 48 | 
 49 | -- | Two-sample Welch's t-test. It assumes that both samples are
 50 | --   normally distributed but doesn't assume that they have same
 51 | --   variance. Returns @Nothing@ if sample sizes are not sufficient.
 52 | welchTTest :: (G.Vector v Double)
 53 |            => PositionTest  -- ^ one- or two-tailed test
 54 |            -> v Double      -- ^ Sample A
 55 |            -> v Double      -- ^ Sample B
 56 |            -> Maybe (Test StudentT)
 57 | welchTTest test sample1 sample2
 58 |   | G.length sample1 < 2 || G.length sample2 < 2 = Nothing
 59 |   | otherwise                                    = Just Test
 60 |       { testSignificance = significance test t ndf
 61 |       , testStatistics   = t
 62 |       , testDistribution = studentT ndf
 63 |       }
 64 |   where
 65 |     (t, ndf) = tStatistics False sample1 sample2
 66 | {-# INLINABLE  welchTTest #-}
 67 | {-# SPECIALIZE welchTTest :: PositionTest -> U.Vector Double -> U.Vector Double -> Maybe (Test StudentT) #-}
 68 | {-# SPECIALIZE welchTTest :: PositionTest -> S.Vector Double -> S.Vector Double -> Maybe (Test StudentT) #-}
 69 | {-# SPECIALIZE welchTTest :: PositionTest -> V.Vector Double -> V.Vector Double -> Maybe (Test StudentT) #-}
 70 | 
 71 | -- | Paired two-sample t-test. Two samples are paired in a
 72 | -- within-subject design. Returns @Nothing@ if sample size is not
 73 | -- sufficient.
 74 | pairedTTest :: forall v. (G.Vector v (Double, Double))
 75 |             => PositionTest          -- ^ one- or two-tailed test
 76 |             -> v (Double, Double)    -- ^ paired samples
 77 |             -> Maybe (Test StudentT)
 78 | pairedTTest test sample
 79 |   | G.length sample < 2 = Nothing
 80 |   | otherwise           = Just Test
 81 |       { testSignificance = significance test t ndf
 82 |       , testStatistics   = t
 83 |       , testDistribution = studentT ndf
 84 |       }
 85 |   where
 86 |     (t, ndf) = tStatisticsPaired sample
 87 | {-# INLINABLE  pairedTTest #-}
 88 | {-# SPECIALIZE pairedTTest :: PositionTest -> U.Vector (Double,Double) -> Maybe (Test StudentT) #-}
 89 | {-# SPECIALIZE pairedTTest :: PositionTest -> V.Vector (Double,Double) -> Maybe (Test StudentT) #-}
 90 | 
 91 | 
 92 | -------------------------------------------------------------------------------
 93 | 
 94 | significance :: PositionTest    -- ^ one- or two-tailed
 95 |              -> Double          -- ^ t statistics
 96 |              -> Double          -- ^ degree of freedom
 97 |              -> PValue Double   -- ^ p-value
 98 | significance test t df =
 99 |   case test of
100 |     -- Here we exploit symmetry of T-distribution and calculate small tail
101 |     SamplesDiffer -> mkPValue $ 2 * tailArea (negate (abs t))
102 |     AGreater      -> mkPValue $ tailArea (negate t)
103 |     BGreater      -> mkPValue $ tailArea  t
104 |   where
105 |     tailArea = cumulative (studentT df)
106 | 
107 | 
108 | -- Calculate T statistics for two samples
109 | tStatistics :: (G.Vector v Double)
110 |             => Bool               -- variance equality
111 |             -> v Double
112 |             -> v Double
113 |             -> (Double, Double)
114 | {-# INLINE tStatistics #-}
115 | tStatistics varequal sample1 sample2 = (t, ndf)
116 |   where
117 |     -- t-statistics
118 |     t = (m1 - m2) / sqrt (
119 |       if varequal
120 |         then ((n1 - 1) * s1 + (n2 - 1) * s2) / (n1 + n2 - 2) * (1 / n1 + 1 / n2)
121 |         else s1 / n1 + s2 / n2)
122 | 
123 |     -- degree of freedom
124 |     ndf | varequal  = n1 + n2 - 2
125 |         | otherwise = square (s1 / n1 + s2 / n2)
126 |                     / (square s1 / (square n1 * (n1 - 1)) + square s2 / (square n2 * (n2 - 1)))
127 |     -- statistics of two samples
128 |     n1 = fromIntegral $ G.length sample1
129 |     n2 = fromIntegral $ G.length sample2
130 |     m1 = mean sample1
131 |     m2 = mean sample2
132 |     s1 = varianceUnbiased sample1
133 |     s2 = varianceUnbiased sample2
134 | 
135 | 
136 | -- Calculate T-statistics for paired sample
137 | tStatisticsPaired :: (G.Vector v (Double, Double))
138 |                   => v (Double, Double)
139 |                   -> (Double, Double)
140 | {-# INLINE tStatisticsPaired #-}
141 | tStatisticsPaired sample = (t, ndf)
142 |   where
143 |     -- t-statistics
144 |     t = let d    = U.map (uncurry (-)) $ G.convert sample
145 |             sumd = U.sum d
146 |         in sumd / sqrt ((n * U.sum (U.map square d) - square sumd) / ndf)
147 |     -- degree of freedom
148 |     ndf = n - 1
149 |     n   = fromIntegral $ G.length sample
150 | 


--------------------------------------------------------------------------------
/Statistics/Test/Types.hs:
--------------------------------------------------------------------------------
 1 | {-# LANGUAGE DeriveFunctor, DeriveDataTypeable,DeriveGeneric  #-}
 2 | module Statistics.Test.Types (
 3 |     Test(..)
 4 |   , isSignificant
 5 |   , TestResult(..)
 6 |   , significant
 7 |   , PositionTest(..)
 8 |   ) where
 9 | 
10 | import Control.DeepSeq  (NFData(..))
11 | import Control.Monad    (liftM3)
12 | import Data.Aeson       (FromJSON, ToJSON)
13 | import Data.Binary      (Binary (..))
14 | import Data.Data (Typeable, Data)
15 | import GHC.Generics
16 | 
17 | import Statistics.Types (PValue)
18 | 
19 | 
20 | -- | Result of hypothesis testing
21 | data TestResult = Significant    -- ^ Null hypothesis should be rejected
22 |                 | NotSignificant -- ^ Data is compatible with hypothesis
23 |                   deriving (Eq,Ord,Show,Typeable,Data,Generic)
24 | 
25 | instance Binary   TestResult where
26 |   get = do
27 |       sig <- get
28 |       if sig then return Significant else return NotSignificant
29 |   put = put . (== Significant)
30 | instance FromJSON TestResult
31 | instance ToJSON   TestResult
32 | instance NFData   TestResult
33 | 
34 | 
35 | 
36 | -- | Result of statistical test.
37 | data Test distr = Test
38 |   { testSignificance :: !(PValue Double)
39 |     -- ^ Probability of getting value of test statistics at least as
40 |     --   extreme as measured.
41 |   , testStatistics   :: !Double
42 |     -- ^ Statistic used for test.
43 |   , testDistribution :: distr
44 |     -- ^ Distribution of test statistics if null hypothesis is correct.
45 |   }
46 |   deriving (Eq,Ord,Show,Typeable,Data,Generic,Functor)
47 | 
48 | instance (Binary   d) => Binary   (Test d) where
49 |   get = liftM3 Test get get get
50 |   put (Test sign stat distr) = put sign >> put stat >> put distr
51 | instance (FromJSON d) => FromJSON (Test d)
52 | instance (ToJSON   d) => ToJSON   (Test d)
53 | instance (NFData   d) => NFData   (Test d) where
54 |   rnf (Test _ _ a) = rnf a
55 | 
56 | -- | Check whether test is significant for given p-value.
57 | isSignificant :: PValue Double -> Test d -> TestResult
58 | isSignificant p t
59 |   = significant $ p >= testSignificance t
60 | 
61 | 
62 | -- | Test type for test which compare positional (mean,median etc.)
63 | --   information of samples.
64 | data PositionTest
65 |   = SamplesDiffer
66 |     -- ^ Test whether samples differ in position. Null hypothesis is
67 |     --   samples are not different
68 |   | AGreater
69 |     -- ^ Test if first sample (A) is larger than second (B). Null
70 |     --   hypothesis is first sample is not larger than second.
71 |   | BGreater
72 |     -- ^ Test if second sample is larger than first.
73 |   deriving (Eq,Ord,Show,Typeable,Data,Generic)
74 | 
75 | instance Binary   PositionTest where
76 |   get = do
77 |     i <- get
78 |     case (i :: Int) of
79 |       0 -> return SamplesDiffer
80 |       1 -> return AGreater
81 |       2 -> return BGreater
82 |       _ -> fail "Invalid PositionTest"
83 |   put SamplesDiffer = put (0 :: Int)
84 |   put AGreater      = put (1 :: Int)
85 |   put BGreater      = put (2 :: Int)
86 | instance FromJSON PositionTest
87 | instance ToJSON   PositionTest
88 | instance NFData   PositionTest
89 | 
90 | -- | significant if parameter is 'True', not significant otherwise
91 | significant :: Bool -> TestResult
92 | significant True  = Significant
93 | significant False = NotSignificant
94 | 


--------------------------------------------------------------------------------
/Statistics/Types/Internal.hs:
--------------------------------------------------------------------------------
 1 | -- |
 2 | -- Module    : Statistics.Types.Internal
 3 | -- Copyright : (c) 2009 Bryan O'Sullivan
 4 | -- License   : BSD3
 5 | --
 6 | -- Maintainer  : bos@serpentine.com
 7 | -- Stability   : experimental
 8 | -- Portability : portable
 9 | --
10 | -- Types for working with statistics.
11 | module Statistics.Types.Internal where
12 | 
13 | 
14 | import qualified Data.Vector.Unboxed as U (Vector)
15 | 
16 | -- | Sample data.
17 | type Sample = U.Vector Double
18 | 
19 | -- | Sample with weights. First element of sample is data, second is weight
20 | type WeightedSample = U.Vector (Double,Double)
21 | 
22 | -- | Weights for affecting the importance of elements of a sample.
23 | type Weights = U.Vector Double
24 | 
25 | 


--------------------------------------------------------------------------------
/bench-papi/Bench.hs:
--------------------------------------------------------------------------------
 1 | -- |
 2 | -- Here we reexport definitions of tasty-bench
 3 | module Bench
 4 |   ( whnf
 5 |   , nf
 6 |   , nfIO
 7 |   , whnfIO
 8 |   , bench
 9 |   , bgroup
10 |   , defaultMain
11 |   , benchIngredients
12 |   ) where
13 | 
14 | import Test.Tasty.PAPI
15 | 


--------------------------------------------------------------------------------
/bench-time/Bench.hs:
--------------------------------------------------------------------------------
 1 | -- |
 2 | -- Here we reexport definitions of tasty-bench
 3 | module Bench
 4 |   ( whnf
 5 |   , nf
 6 |   , nfIO
 7 |   , whnfIO
 8 |   , bench
 9 |   , bgroup
10 |   , defaultMain
11 |   , benchIngredients
12 |   ) where
13 | 
14 | import Test.Tasty.Bench
15 | 


--------------------------------------------------------------------------------
/benchmark/bench.hs:
--------------------------------------------------------------------------------
 1 | import Data.Complex
 2 | import Statistics.Sample
 3 | import Statistics.Transform
 4 | import Statistics.Correlation
 5 | import System.Random.MWC
 6 | import qualified Data.Vector.Unboxed as VU
 7 | import qualified Data.Vector.Unboxed.Mutable as MVU
 8 | 
 9 | import Bench
10 | 
11 | 
12 | -- Test sample
13 | sample :: VU.Vector Double
14 | sample = VU.create $ do g <- create
15 |                         MVU.replicateM 10000 (uniform g)
16 | 
17 | -- Weighted test sample
18 | sampleW :: VU.Vector (Double,Double)
19 | sampleW = VU.zip sample (VU.reverse sample)
20 | 
21 | -- Complex vector for FFT tests
22 | sampleC :: VU.Vector (Complex Double)
23 | sampleC = VU.zipWith (:+) sample (VU.reverse sample)
24 | 
25 | 
26 | -- Simple benchmark for functions from Statistics.Sample
27 | main :: IO ()
28 | main =
29 |   defaultMain
30 |   [ bgroup "sample"
31 |     [ bench "range"            $ nf (\x -> range x)            sample
32 |       -- Mean
33 |     , bench "mean"             $ nf (\x -> mean x)             sample
34 |     , bench "meanWeighted"     $ nf (\x -> meanWeighted x)     sampleW
35 |     , bench "harmonicMean"     $ nf (\x -> harmonicMean x)     sample
36 |     , bench "geometricMean"    $ nf (\x -> geometricMean x)    sample
37 |       -- Variance
38 |     , bench "variance"         $ nf (\x -> variance x)         sample
39 |     , bench "varianceUnbiased" $ nf (\x -> varianceUnbiased x) sample
40 |     , bench "varianceWeighted" $ nf (\x -> varianceWeighted x) sampleW
41 |       -- Correlation
42 |     , bench "pearson"          $ nf pearson     sampleW
43 |     , bench "covariance"       $ nf covariance  sampleW
44 |     , bench "correlation"      $ nf correlation sampleW
45 |     , bench "covariance2"      $ nf (covariance2  sample) sample
46 |     , bench "correlation2"     $ nf (correlation2 sample) sample
47 |       -- Other
48 |     , bench "stdDev"           $ nf (\x -> stdDev x)           sample
49 |     , bench "skewness"         $ nf (\x -> skewness x)         sample
50 |     , bench "kurtosis"         $ nf (\x -> kurtosis x)         sample
51 |       -- Central moments
52 |     , bench "C.M. 2"           $ nf (\x -> centralMoment 2 x)  sample
53 |     , bench "C.M. 3"           $ nf (\x -> centralMoment 3 x)  sample
54 |     , bench "C.M. 4"           $ nf (\x -> centralMoment 4 x)  sample
55 |     , bench "C.M. 5"           $ nf (\x -> centralMoment 5 x)  sample
56 |     ]
57 |   , bgroup "FFT"
58 |     [ bgroup "fft"
59 |       [ bench  (show n) $ whnf fft   (VU.take n sampleC) | n <- fftSizes ]
60 |     , bgroup "ifft"
61 |       [ bench  (show n) $ whnf ifft  (VU.take n sampleC) | n <- fftSizes ]
62 |     , bgroup "dct"
63 |       [ bench  (show n) $ whnf dct   (VU.take n sample)  | n <- fftSizes ]
64 |     , bgroup "dct_"
65 |       [ bench  (show n) $ whnf dct_  (VU.take n sampleC) | n <- fftSizes ]
66 |     , bgroup "idct"
67 |       [ bench  (show n) $ whnf idct  (VU.take n sample)  | n <- fftSizes ]
68 |     , bgroup "idct_"
69 |       [ bench  (show n) $ whnf idct_ (VU.take n sampleC) | n <- fftSizes ]
70 |     ]
71 |   ]
72 | 
73 | 
74 | fftSizes :: [Int]
75 | fftSizes = [32,128,512,2048]
76 | 


--------------------------------------------------------------------------------
/cabal.project:
--------------------------------------------------------------------------------
1 | packages:
2 |   statistics.cabal
3 | 
4 | 


--------------------------------------------------------------------------------
/dense-linear-algebra/LICENSE:
--------------------------------------------------------------------------------
 1 | Copyright (c) 2009, 2010 Bryan O'Sullivan
 2 | All rights reserved.
 3 | 
 4 | Redistribution and use in source and binary forms, with or without
 5 | modification, are permitted provided that the following conditions
 6 | are met:
 7 | 
 8 |     * Redistributions of source code must retain the above copyright
 9 |       notice, this list of conditions and the following disclaimer.
10 | 
11 |     * Redistributions in binary form must reproduce the above
12 |       copyright notice, this list of conditions and the following
13 |       disclaimer in the documentation and/or other materials provided
14 |       with the distribution.
15 | 
16 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
17 | "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
18 | LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
19 | A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
20 | OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
21 | SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
22 | LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23 | DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24 | THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25 | (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
26 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27 | 


--------------------------------------------------------------------------------
/dense-linear-algebra/README.md:
--------------------------------------------------------------------------------
1 | # statistics-dense-linear-algebra
2 | 
3 | [![Build Status](https://travis-ci.org/githubuser/statistics-dense-linear-algebra.png)](https://travis-ci.org/githubuser/statistics-dense-linear-algebra)
4 | 
5 | The dense linear algebra functionality and related modules, extracted from `statistics-0.14.0.2`
6 | 


--------------------------------------------------------------------------------
/dense-linear-algebra/dense-linear-algebra.cabal:
--------------------------------------------------------------------------------
 1 | name:                dense-linear-algebra
 2 | version:             0.1.0.0
 3 | synopsis:            Simple and incomplete pure haskell implementation of linear algebra
 4 | description:
 5 |   This library is simply collection of linear-algebra related modules
 6 |   split from statistics library.
 7 | 
 8 | license:             BSD2
 9 | license-file:        LICENSE
10 | author:              Bryan O'Sullivan <bos@serpentine.com>
11 | maintainer:          Alexey Khudaykov <alexey.skladnoy@gmail.com>
12 | copyright:           2018 Author name here
13 | category:            Math, Statistics, Numeric
14 | build-type:          Simple
15 | extra-source-files:  README.md
16 | cabal-version:       >=1.10
17 | 
18 | tested-with:
19 |     GHC ==7.4.2
20 |      || ==7.6.3
21 |      || ==7.8.4
22 |      || ==7.10.3
23 |      || ==8.0.2
24 |      || ==8.2.2
25 |      || ==8.4.4
26 |      || ==8.6.5
27 |      || ==8.8.1
28 |   , GHCJS ==8.4
29 | 
30 | library
31 |   default-language:    Haskell2010
32 |   ghc-options:         -Wall
33 |   hs-source-dirs:      src
34 |   exposed-modules:     Statistics.Matrix
35 |                        Statistics.Matrix.Algorithms
36 |                        Statistics.Matrix.Function
37 |                        Statistics.Matrix.Mutable
38 |                        Statistics.Matrix.Types
39 |   build-depends:       base                    >= 4.5 && < 5
40 |                      , deepseq                 >= 1.1.0.2
41 |                      , math-functions          >= 0.1.7
42 |                      , primitive               >= 0.3
43 |                      , vector                  >= 0.10
44 |                      , vector-algorithms       >= 0.4
45 |                      , vector-th-unbox
46 |                      , vector-binary-instances >= 0.2.1
47 | 
48 | test-suite spec
49 |   default-language:    Haskell2010
50 |   ghc-options:         -Wall
51 |   type:                exitcode-stdio-1.0
52 |   hs-source-dirs:      test
53 |   main-is:             LibSpec.hs
54 |   build-depends:       base
55 |                      , dense-linear-algebra
56 |                      , hspec
57 |                      , QuickCheck
58 | 
59 | source-repository head
60 |   type:     git
61 |   location: https://github.com/haskell/statistics
62 | 


--------------------------------------------------------------------------------
/dense-linear-algebra/src/Statistics/Matrix/Algorithms.hs:
--------------------------------------------------------------------------------
 1 | -- |
 2 | -- Module    : Statistics.Matrix.Algorithms
 3 | -- Copyright : 2014 Bryan O'Sullivan
 4 | -- License   : BSD3
 5 | --
 6 | -- Useful matrix functions.
 7 | 
 8 | module Statistics.Matrix.Algorithms
 9 |     (
10 |       qr
11 |     ) where
12 | 
13 | import Control.Applicative ((<$>), (<*>))
14 | import Control.Monad.ST (ST, runST)
15 | import Prelude hiding (replicate)
16 | import Numeric.Sum       (sumVector,kbn)
17 | import Statistics.Matrix (Matrix, column, dimension, for, norm)
18 | import qualified Statistics.Matrix.Mutable as M
19 | import qualified Data.Vector.Unboxed as U
20 | 
21 | -- | /O(r*c)/ Compute the QR decomposition of a matrix.
22 | -- The result returned is the matrices (/q/,/r/).
23 | qr :: Matrix -> (Matrix, Matrix)
24 | qr mat = runST $ do
25 |   let (m,n) = dimension mat
26 |   r <- M.replicate n n 0
27 |   a <- M.thaw mat
28 |   for 0 n $ \j -> do
29 |     cn <- M.immutably a $ \aa -> norm (column aa j)
30 |     M.unsafeWrite r j j cn
31 |     for 0 m $ \i -> M.unsafeModify a i j (/ cn)
32 |     for (j+1) n $ \jj -> do
33 |       p <- innerProduct a j jj
34 |       M.unsafeWrite r j jj p
35 |       for 0 m $ \i -> do
36 |         aij <- M.unsafeRead a i j
37 |         M.unsafeModify a i jj $ subtract (p * aij)
38 |   (,) <$> M.unsafeFreeze a <*> M.unsafeFreeze r
39 | 
40 | innerProduct :: M.MMatrix s -> Int -> Int -> ST s Double
41 | innerProduct mmat j k = M.immutably mmat $ \mat ->
42 |   sumVector kbn $ U.zipWith (*) (column mat j) (column mat k)
43 | 


--------------------------------------------------------------------------------
/dense-linear-algebra/src/Statistics/Matrix/Function.hs:
--------------------------------------------------------------------------------
 1 | {-# LANGUAGE BangPatterns #-}
 2 | -- |
 3 | module Statistics.Matrix.Function where
 4 | 
 5 | -- | Multiply a number by itself.
 6 | square :: Double -> Double
 7 | square x = x * x
 8 | 
 9 | -- | Simple for loop.  Counts from /start/ to /end/-1.
10 | for :: Monad m => Int -> Int -> (Int -> m ()) -> m ()
11 | for n0 !n f = loop n0
12 |   where
13 |     loop i | i == n    = return ()
14 |            | otherwise = f i >> loop (i+1)
15 | {-# INLINE for #-}
16 | 


--------------------------------------------------------------------------------
/dense-linear-algebra/src/Statistics/Matrix/Mutable.hs:
--------------------------------------------------------------------------------
 1 | -- |
 2 | -- Module    : Statistics.Matrix.Mutable
 3 | -- Copyright : (c) 2014 Bryan O'Sullivan
 4 | -- License   : BSD3
 5 | --
 6 | -- Basic mutable matrix operations.
 7 | 
 8 | module Statistics.Matrix.Mutable
 9 |     (
10 |       MMatrix(..)
11 |     , MVector
12 |     , replicate
13 |     , thaw
14 |     , bounds
15 |     , unsafeNew
16 |     , unsafeFreeze
17 |     , unsafeRead
18 |     , unsafeWrite
19 |     , unsafeModify
20 |     , immutably
21 |     , unsafeBounds
22 |     ) where
23 | 
24 | import Control.Applicative ((<$>))
25 | import Control.DeepSeq (NFData(..))
26 | import Control.Monad.ST (ST)
27 | import Statistics.Matrix.Types (Matrix(..), MMatrix(..), MVector)
28 | import qualified Data.Vector.Unboxed as U
29 | import qualified Data.Vector.Unboxed.Mutable as M
30 | import Prelude hiding (replicate)
31 | 
32 | replicate :: Int -> Int -> Double -> ST s (MMatrix s)
33 | replicate r c k = MMatrix r c <$> M.replicate (r*c) k
34 | 
35 | thaw :: Matrix -> ST s (MMatrix s)
36 | thaw (Matrix r c v) = MMatrix r c <$> U.thaw v
37 | 
38 | unsafeFreeze :: MMatrix s -> ST s Matrix
39 | unsafeFreeze (MMatrix r c mv) = Matrix r c <$> U.unsafeFreeze mv
40 | 
41 | -- | Allocate new matrix. Matrix content is not initialized hence unsafe.
42 | unsafeNew :: Int                -- ^ Number of row
43 |           -> Int                -- ^ Number of columns
44 |           -> ST s (MMatrix s)
45 | unsafeNew r c
46 |   | r < 0     = error "Statistics.Matrix.Mutable.unsafeNew: negative number of rows"
47 |   | c < 0     = error "Statistics.Matrix.Mutable.unsafeNew: negative number of columns"
48 |   | otherwise = do
49 |       vec <- M.new (r*c)
50 |       return $ MMatrix r c vec
51 | 
52 | unsafeRead :: MMatrix s -> Int -> Int -> ST s Double
53 | unsafeRead mat r c = unsafeBounds mat r c M.unsafeRead
54 | {-# INLINE unsafeRead #-}
55 | 
56 | unsafeWrite :: MMatrix s -> Int -> Int -> Double -> ST s ()
57 | unsafeWrite mat row col k = unsafeBounds mat row col $ \v i ->
58 |   M.unsafeWrite v i k
59 | {-# INLINE unsafeWrite #-}
60 | 
61 | unsafeModify :: MMatrix s -> Int -> Int -> (Double -> Double) -> ST s ()
62 | unsafeModify mat row col f = unsafeBounds mat row col $ \v i -> do
63 |   k <- M.unsafeRead v i
64 |   M.unsafeWrite v i (f k)
65 | {-# INLINE unsafeModify #-}
66 | 
67 | -- | Given row and column numbers, calculate the offset into the flat
68 | -- row-major vector.
69 | bounds :: MMatrix s -> Int -> Int -> (MVector s -> Int -> r) -> r
70 | bounds (MMatrix rs cs mv) r c k
71 |   | r < 0 || r >= rs = error "row out of bounds"
72 |   | c < 0 || c >= cs = error "column out of bounds"
73 |   | otherwise        = k mv $! r * cs + c
74 | {-# INLINE bounds #-}
75 | 
76 | -- | Given row and column numbers, calculate the offset into the flat
77 | -- row-major vector, without checking.
78 | unsafeBounds :: MMatrix s -> Int -> Int -> (MVector s -> Int -> r) -> r
79 | unsafeBounds (MMatrix _ cs mv) r c k = k mv $! r * cs + c
80 | {-# INLINE unsafeBounds #-}
81 | 
82 | immutably :: NFData a => MMatrix s -> (Matrix -> a) -> ST s a
83 | immutably mmat f = do
84 |   k <- f <$> unsafeFreeze mmat
85 |   rnf k `seq` return k
86 | {-# INLINE immutably #-}
87 | 


--------------------------------------------------------------------------------
/dense-linear-algebra/src/Statistics/Matrix/Types.hs:
--------------------------------------------------------------------------------
 1 | -- |
 2 | -- Module    : Statistics.Matrix.Types
 3 | -- Copyright : 2014 Bryan O'Sullivan
 4 | -- License   : BSD3
 5 | --
 6 | -- Basic matrix operations.
 7 | --
 8 | -- There isn't a widely used matrix package for Haskell yet, so
 9 | -- we implement the necessary minimum here.
10 | 
11 | module Statistics.Matrix.Types
12 |     (
13 |       Vector
14 |     , MVector
15 |     , Matrix(..)
16 |     , MMatrix(..)
17 |     , debug
18 |     ) where
19 | 
20 | import Data.Char (isSpace)
21 | import Numeric (showFFloat)
22 | import qualified Data.Vector.Unboxed as U
23 | import qualified Data.Vector.Unboxed.Mutable as M
24 | 
25 | type Vector = U.Vector Double
26 | type MVector s = M.MVector s Double
27 | 
28 | -- | Two-dimensional matrix, stored in row-major order.
29 | data Matrix = Matrix {
30 |       rows     :: {-# UNPACK #-} !Int -- ^ Rows of matrix.
31 |     , cols     :: {-# UNPACK #-} !Int -- ^ Columns of matrix.
32 |     , _vector  :: !Vector             -- ^ Matrix data.
33 |     } deriving (Eq)
34 | 
35 | -- | Two-dimensional mutable matrix, stored in row-major order.
36 | data MMatrix s = MMatrix
37 |                  {-# UNPACK #-} !Int
38 |                  {-# UNPACK #-} !Int
39 |                  !(MVector s)
40 | 
41 | -- The Show instance is useful only for debugging.
42 | instance Show Matrix where
43 |     show = debug
44 | 
45 | debug :: Matrix -> String
46 | debug (Matrix r c vs) = unlines $ zipWith (++) (hdr0 : repeat hdr) rrows
47 |   where
48 |     rrows         = map (cleanEnd . unwords) . split $ zipWith (++) ldone tdone
49 |     hdr0          = show (r,c) ++ " "
50 |     hdr           = replicate (length hdr0) ' '
51 |     pad plus k xs = replicate (k - length xs) ' ' `plus` xs
52 |     ldone         = map (pad (++) (longest lstr)) lstr
53 |     tdone         = map (pad (flip (++)) (longest tstr)) tstr
54 |     (lstr, tstr)  = unzip . map (break (=='.') . render) . U.toList $ vs
55 |     longest       = maximum . map length
56 |     render k      = reverse . dropWhile (=='.') . dropWhile (=='0') . reverse .
57 |                     showFFloat (Just 4) k $ ""
58 |     split []      = []
59 |     split xs      = i : split rest where (i, rest) = splitAt c xs
60 |     cleanEnd      = reverse . dropWhile isSpace . reverse
61 | 


--------------------------------------------------------------------------------
/dense-linear-algebra/test/LibSpec.hs:
--------------------------------------------------------------------------------
 1 | module Main where
 2 | 
 3 | import Test.Hspec
 4 | import Test.Hspec.QuickCheck
 5 | 
 6 | 
 7 | main :: IO ()
 8 | main = hspec spec
 9 | 
10 | spec :: Spec
11 | spec =
12 |   describe "Lib" $ do
13 |     it "works" $ do
14 |       True `shouldBe` True
15 |     -- prop "ourAdd is commutative" $ \x y ->
16 |     --   ourAdd x y `shouldBe` ourAdd y x
17 | 


--------------------------------------------------------------------------------
/dense-linear-algebra/test/Spec.hs:
--------------------------------------------------------------------------------
1 | {-# OPTIONS_GHC -F -pgmF hspec-discover #-}
2 | 


--------------------------------------------------------------------------------
/examples/kde/KDE.hs:
--------------------------------------------------------------------------------
 1 | {-# LANGUAGE OverloadedStrings #-}
 2 | 
 3 | import Control.Applicative ((<$>))
 4 | import Statistics.Sample.KernelDensity (kde)
 5 | import Text.Hastache (MuType(..), defaultConfig, hastacheFile)
 6 | import Text.Hastache.Context (mkStrContext)
 7 | import qualified Data.Attoparsec.ByteString as B
 8 | import qualified Data.Attoparsec.ByteString.Char8 as A
 9 | import qualified Data.ByteString as B
10 | import qualified Data.ByteString.Lazy as L
11 | import qualified Data.Vector.Unboxed as U
12 | import qualified Data.Text.Lazy.IO as TL
13 | 
14 | csv = do
15 |   B.takeTill A.isEndOfLine
16 |   (A.double `A.sepBy` A.char ',') `A.sepBy` A.endOfLine
17 | 
18 | main = do
19 |   waits <- (either error (U.fromList . map last . filter (not.null)) .
20 |             A.parseOnly csv) <$> B.readFile "data/faithful.csv"
21 |   let xs = map (\(a,b) -> [a,b]) . U.toList . uncurry U.zip . kde 64 $ waits
22 |       context "data" = MuVariable . show $ xs
23 |   s <- hastacheFile defaultConfig "kde.tpl" (mkStrContext context)
24 |   TL.writeFile "kde.html" s
25 | 


--------------------------------------------------------------------------------
/examples/kde/data/faithful.csv:
--------------------------------------------------------------------------------
  1 | eruption,wait
  2 | 3.6,79
  3 | 1.8,54
  4 | 3.333,74
  5 | 2.283,62
  6 | 4.533,85
  7 | 2.883,55
  8 | 4.7,88
  9 | 3.6,85
 10 | 1.95,51
 11 | 4.35,85
 12 | 1.833,54
 13 | 3.917,84
 14 | 4.2,78
 15 | 1.75,47
 16 | 4.7,83
 17 | 2.167,52
 18 | 1.75,62
 19 | 4.8,84
 20 | 1.6,52
 21 | 4.25,79
 22 | 1.8,51
 23 | 1.75,47
 24 | 3.45,78
 25 | 3.067,69
 26 | 4.533,74
 27 | 3.6,83
 28 | 1.967,55
 29 | 4.083,76
 30 | 3.85,78
 31 | 4.433,79
 32 | 4.3,73
 33 | 4.467,77
 34 | 3.367,66
 35 | 4.033,80
 36 | 3.833,74
 37 | 2.017,52
 38 | 1.867,48
 39 | 4.833,80
 40 | 1.833,59
 41 | 4.783,90
 42 | 4.35,80
 43 | 1.883,58
 44 | 4.567,84
 45 | 1.75,58
 46 | 4.533,73
 47 | 3.317,83
 48 | 3.833,64
 49 | 2.1,53
 50 | 4.633,82
 51 | 2,59
 52 | 4.8,75
 53 | 4.716,90
 54 | 1.833,54
 55 | 4.833,80
 56 | 1.733,54
 57 | 4.883,83
 58 | 3.717,71
 59 | 1.667,64
 60 | 4.567,77
 61 | 4.317,81
 62 | 2.233,59
 63 | 4.5,84
 64 | 1.75,48
 65 | 4.8,82
 66 | 1.817,60
 67 | 4.4,92
 68 | 4.167,78
 69 | 4.7,78
 70 | 2.067,65
 71 | 4.7,73
 72 | 4.033,82
 73 | 1.967,56
 74 | 4.5,79
 75 | 4,71
 76 | 1.983,62
 77 | 5.067,76
 78 | 2.017,60
 79 | 4.567,78
 80 | 3.883,76
 81 | 3.6,83
 82 | 4.133,75
 83 | 4.333,82
 84 | 4.1,70
 85 | 2.633,65
 86 | 4.067,73
 87 | 4.933,88
 88 | 3.95,76
 89 | 4.517,80
 90 | 2.167,48
 91 | 4,86
 92 | 2.2,60
 93 | 4.333,90
 94 | 1.867,50
 95 | 4.817,78
 96 | 1.833,63
 97 | 4.3,72
 98 | 4.667,84
 99 | 3.75,75
100 | 1.867,51
101 | 4.9,82
102 | 2.483,62
103 | 4.367,88
104 | 2.1,49
105 | 4.5,83
106 | 4.05,81
107 | 1.867,47
108 | 4.7,84
109 | 1.783,52
110 | 4.85,86
111 | 3.683,81
112 | 4.733,75
113 | 2.3,59
114 | 4.9,89
115 | 4.417,79
116 | 1.7,59
117 | 4.633,81
118 | 2.317,50
119 | 4.6,85
120 | 1.817,59
121 | 4.417,87
122 | 2.617,53
123 | 4.067,69
124 | 4.25,77
125 | 1.967,56
126 | 4.6,88
127 | 3.767,81
128 | 1.917,45
129 | 4.5,82
130 | 2.267,55
131 | 4.65,90
132 | 1.867,45
133 | 4.167,83
134 | 2.8,56
135 | 4.333,89
136 | 1.833,46
137 | 4.383,82
138 | 1.883,51
139 | 4.933,86
140 | 2.033,53
141 | 3.733,79
142 | 4.233,81
143 | 2.233,60
144 | 4.533,82
145 | 4.817,77
146 | 4.333,76
147 | 1.983,59
148 | 4.633,80
149 | 2.017,49
150 | 5.1,96
151 | 1.8,53
152 | 5.033,77
153 | 4,77
154 | 2.4,65
155 | 4.6,81
156 | 3.567,71
157 | 4,70
158 | 4.5,81
159 | 4.083,93
160 | 1.8,53
161 | 3.967,89
162 | 2.2,45
163 | 4.15,86
164 | 2,58
165 | 3.833,78
166 | 3.5,66
167 | 4.583,76
168 | 2.367,63
169 | 5,88
170 | 1.933,52
171 | 4.617,93
172 | 1.917,49
173 | 2.083,57
174 | 4.583,77
175 | 3.333,68
176 | 4.167,81
177 | 4.333,81
178 | 4.5,73
179 | 2.417,50
180 | 4,85
181 | 4.167,74
182 | 1.883,55
183 | 4.583,77
184 | 4.25,83
185 | 3.767,83
186 | 2.033,51
187 | 4.433,78
188 | 4.083,84
189 | 1.833,46
190 | 4.417,83
191 | 2.183,55
192 | 4.8,81
193 | 1.833,57
194 | 4.8,76
195 | 4.1,84
196 | 3.966,77
197 | 4.233,81
198 | 3.5,87
199 | 4.366,77
200 | 2.25,51
201 | 4.667,78
202 | 2.1,60
203 | 4.35,82
204 | 4.133,91
205 | 1.867,53
206 | 4.6,78
207 | 1.783,46
208 | 4.367,77
209 | 3.85,84
210 | 1.933,49
211 | 4.5,83
212 | 2.383,71
213 | 4.7,80
214 | 1.867,49
215 | 3.833,75
216 | 3.417,64
217 | 4.233,76
218 | 2.4,53
219 | 4.8,94
220 | 2,55
221 | 4.15,76
222 | 1.867,50
223 | 4.267,82
224 | 1.75,54
225 | 4.483,75
226 | 4,78
227 | 4.117,79
228 | 4.083,78
229 | 4.267,78
230 | 3.917,70
231 | 4.55,79
232 | 4.083,70
233 | 2.417,54
234 | 4.183,86
235 | 2.217,50
236 | 4.45,90
237 | 1.883,54
238 | 1.85,54
239 | 4.283,77
240 | 3.95,79
241 | 2.333,64
242 | 4.15,75
243 | 2.35,47
244 | 4.933,86
245 | 2.9,63
246 | 4.583,85
247 | 3.833,82
248 | 2.083,57
249 | 4.367,82
250 | 2.133,67
251 | 4.35,74
252 | 2.2,54
253 | 4.45,83
254 | 3.567,73
255 | 4.5,73
256 | 4.15,88
257 | 3.817,80
258 | 3.917,71
259 | 4.45,83
260 | 2,56
261 | 4.283,79
262 | 4.767,78
263 | 4.533,84
264 | 1.85,58
265 | 4.25,83
266 | 1.983,43
267 | 2.25,60
268 | 4.75,75
269 | 4.117,81
270 | 2.15,46
271 | 4.417,90
272 | 1.817,46
273 | 4.467,74
274 | 


--------------------------------------------------------------------------------
/examples/kde/kde.html:
--------------------------------------------------------------------------------
 1 | <!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN" "http://www.w3.org/TR/html4/loose.dtd">
 2 | <html>
 3 |  <head>
 4 |     <meta http-equiv="Content-Type" content="text/html; charset=utf-8">
 5 |     <title>Kernel density</title>
 6 |     <!--[if lte IE 8]><script language="javascript" type="text/javascript" src="http://people.iola.dk/olau/flot/excanvas.min.js"></script><![endif]-->
 7 |     <script language="javascript" type="text/javascript" src="https://ajax.googleapis.com/ajax/libs/jquery/1.6.4/jquery.min.js"></script>
 8 |     <script language="javascript" type="text/javascript" src="http://people.iola.dk/olau/flot/jquery.flot.js"></script>
 9 |  </head>
10 |     <body>
11 |     <h1>Kernel density</h1>
12 | 
13 |     <div id="placeholder" style="width:600px;height:450px;"></div>
14 | 
15 |     <p>This is a 64-point kernel density estimate
16 |       of <a href="http://stat.ethz.ch/R-manual/R-patched/library/datasets/html/faithful.html">wait
17 |         times between eruptions</a> of
18 |       the <a href="http://en.wikipedia.org/wiki/Old_Faithful">Old
19 |       Faithful</a> geyser.</p>
20 | 
21 | <script type="text/javascript">
22 | $(function () {
23 |     $.plot($("#placeholder"), [ [[37.7,2.5161110551039025e-4],[38.709523809523816,4.447091645179541e-4],[39.71904761904762,8.89495267293151e-4],[40.72857142857143,1.6826638124416372e-3],[41.73809523809524,2.915853030152525e-3],[42.747619047619054,4.617384776241099e-3],[43.75714285714286,6.707125941058233e-3],[44.766666666666666,9.002680047224753e-3],[45.77619047619048,1.1289358222230473e-2],[46.78571428571429,1.3413998627118355e-2],[47.7952380952381,1.5334009498412205e-2],[48.804761904761904,1.7084636391985843e-2],[49.81428571428572,1.869160073198233e-2],[50.82380952380953,2.0093659237928833e-2],[51.833333333333336,2.1129704241951732e-2],[52.84285714285714,2.160813072660192e-2],[53.852380952380955,2.142690467760544e-2],[54.86190476190477,2.0663894588783302e-2],[55.871428571428574,1.9554774751720513e-2],[56.88095238095238,1.835784852185525e-2],[57.89047619047619,1.721364996782301e-2],[58.900000000000006,1.611898372722214e-2],[59.90952380952381,1.5018622544779535e-2],[60.91904761904762,1.3900964326230551e-2],[61.92857142857143,1.2803755503590803e-2],[62.938095238095244,1.175952549012556e-2],[63.94761904761905,1.0778427101353434e-2],[64.95714285714286,9.90254113687662e-3],[65.96666666666667,9.263754969613376e-3],[66.97619047619048,9.065069215913515e-3],[67.9857142857143,9.489824501493842e-3],[68.9952380952381,1.062157012231642e-2],[70.0047619047619,1.2443698406039176e-2],[71.01428571428572,1.4902887084493477e-2],[72.02380952380952,1.7957646715371086e-2],[73.03333333333333,2.155509535870428e-2],[74.04285714285714,2.5555036677672206e-2],[75.05238095238096,2.967437285217729e-2],[76.06190476190477,3.3517062326339185e-2],[77.07142857142857,3.6695760198314636e-2],[78.08095238095238,3.897328209325028e-2],[79.0904761904762,4.0310862807977195e-2],[80.1,4.076878209020111e-2],[81.10952380952381,4.034443197900639e-2],[82.11904761904762,3.8916020257382e-2],[83.12857142857143,3.6371579849283686e-2],[84.13809523809525,3.2813879362105385e-2],[85.14761904761905,2.8641170617233373e-2],[86.15714285714286,2.440986212690428e-2],[87.16666666666667,2.0578794105541566e-2],[88.17619047619047,1.7329418869432917e-2],[89.18571428571428,1.4578610745209346e-2],[90.1952380952381,1.2139322012628417e-2],[91.20476190476191,9.885013669357134e-3],[92.21428571428572,7.807129857922685e-3],[93.22380952380952,5.966284588636623e-3],[94.23333333333333,4.415584046924452e-3],[95.24285714285715,3.1632654187895254e-3],[96.25238095238095,2.1821132245726424e-3],[97.26190476190476,1.43459816068524e-3],[98.27142857142857,8.875453007766301e-4],[99.28095238095239,5.128125355532956e-4],[100.2904761904762,2.8384986932914304e-4],[101.3,1.768029983316066e-4]] ]);
24 | });
25 | </script>
26 | 
27 |  </body>
28 | </html>
29 | 


--------------------------------------------------------------------------------
/examples/kde/kde.tpl:
--------------------------------------------------------------------------------
 1 | <!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN" "http://www.w3.org/TR/html4/loose.dtd">
 2 | <html>
 3 |  <head>
 4 |     <meta http-equiv="Content-Type" content="text/html; charset=utf-8">
 5 |     <title>Kernel density</title>
 6 |     <!--[if lte IE 8]><script language="javascript" type="text/javascript" src="http://people.iola.dk/olau/flot/excanvas.min.js"></script><![endif]-->
 7 |     <script language="javascript" type="text/javascript" src="https://ajax.googleapis.com/ajax/libs/jquery/1.6.4/jquery.min.js"></script>
 8 |     <script language="javascript" type="text/javascript" src="http://people.iola.dk/olau/flot/jquery.flot.js"></script>
 9 |  </head>
10 |     <body>
11 |     <h1>Kernel density</h1>
12 | 
13 |     <div id="placeholder" style="width:600px;height:450px;"></div>
14 | 
15 |     <p>This is a 64-point kernel density estimate
16 |       of <a href="http://stat.ethz.ch/R-manual/R-patched/library/datasets/html/faithful.html">wait
17 |         times between eruptions</a> of
18 |       the <a href="http://en.wikipedia.org/wiki/Old_Faithful">Old
19 |       Faithful</a> geyser.</p>
20 | 
21 | <script type="text/javascript">
22 | $(function () {
23 |     $.plot($("#placeholder"), [ {{data}} ]);
24 | });
25 | </script>
26 | 
27 |  </body>
28 | </html>
29 | 


--------------------------------------------------------------------------------
/tests/Tests/ApproxEq.hs:
--------------------------------------------------------------------------------
  1 | {-# LANGUAGE FlexibleContexts, FlexibleInstances, TypeFamilies #-}
  2 | 
  3 | module Tests.ApproxEq
  4 |     (
  5 |       ApproxEq(..)
  6 |     ) where
  7 | 
  8 | import Data.Complex (Complex(..), realPart)
  9 | import Data.List (intersperse)
 10 | import Data.Maybe (catMaybes)
 11 | import Numeric.MathFunctions.Constants (m_epsilon)
 12 | import Statistics.Matrix hiding (map, toList)
 13 | import Test.QuickCheck
 14 | import qualified Data.Vector as V
 15 | import qualified Data.Vector.Generic as G
 16 | import qualified Data.Vector.Unboxed as U
 17 | import qualified Statistics.Matrix as M
 18 | 
 19 | class (Eq a, Show a) => ApproxEq a where
 20 |     type Bounds a
 21 | 
 22 |     eq   :: Bounds a -> a -> a -> Bool
 23 |     eql  :: Bounds a -> a -> a -> Property
 24 |     eql eps a b = counterexample (show a ++ " /=~ " ++ show b) (eq eps a b)
 25 | 
 26 |     (=~)  :: a -> a -> Bool
 27 | 
 28 |     (==~) :: a -> a -> Property
 29 |     a ==~ b = counterexample (show a ++ " /=~ " ++ show b) (a =~ b)
 30 | 
 31 | instance ApproxEq Double where
 32 |     type Bounds Double = Double
 33 | 
 34 |     eq eps a b
 35 |       | a == 0 && b == 0 = True
 36 |       | otherwise        = abs (a - b) <= eps * max (abs a) (abs b)
 37 |     (=~)  = eq m_epsilon
 38 | 
 39 | instance ApproxEq (Complex Double) where
 40 |     type Bounds (Complex Double) = Double
 41 | 
 42 |     eq eps a@(ar :+ ai) b@(br :+ bi)
 43 |       | a == 0 && b == 0 = True
 44 |       | otherwise        = abs (ar - br) <= eps * d
 45 |                         && abs (ai - bi) <= eps * d
 46 |       where
 47 |         d = max (realPart $ abs a) (realPart $ abs b)
 48 | 
 49 |     (=~)  = eq m_epsilon
 50 | 
 51 | instance ApproxEq [Double] where
 52 |     type Bounds [Double] = Double
 53 | 
 54 |     eq  eps (x:xs) (y:ys) = eq eps x y && eq eps xs ys
 55 |     eq  _   []     []     = True
 56 |     eq  _   _      _      = False
 57 | 
 58 |     eql   = eqll length id id
 59 |     (=~)  = eq m_epsilon
 60 |     (==~) = eql m_epsilon
 61 | 
 62 | instance ApproxEq (U.Vector Double) where
 63 |     type Bounds (U.Vector Double) = Double
 64 | 
 65 |     eq    = eqv
 66 |     (=~)  = eq m_epsilon
 67 |     eql   = eqlv
 68 |     (==~) = eqlv m_epsilon
 69 | 
 70 | instance ApproxEq (V.Vector Double) where
 71 |     type Bounds (V.Vector Double) = Double
 72 | 
 73 |     eq    = eqv
 74 |     (=~)  = eq m_epsilon
 75 |     eql   = eqlv
 76 |     (==~) = eqlv m_epsilon
 77 | 
 78 | instance ApproxEq Matrix where
 79 |     type Bounds Matrix = Double
 80 | 
 81 |     eq eps (Matrix r1 c1 v1) (Matrix r2 c2 v2) =
 82 |       (r1,c1) == (r2,c2) && eq eps v1 v2
 83 |     (=~)  = eq m_epsilon
 84 |     eql eps a b = eqll dimension M.toList (`quotRem` cols a) eps a b
 85 |     (==~) = eql m_epsilon
 86 | 
 87 | eqv :: (ApproxEq a, G.Vector v Bool, G.Vector v a) =>
 88 |        Bounds a -> v a -> v a -> Bool
 89 | eqv eps a b = G.length a == G.length b && G.and (G.zipWith (eq eps) a b)
 90 | 
 91 | eqlv :: (ApproxEq [a], G.Vector v a) => Bounds [a] -> v a -> v a -> Property
 92 | eqlv eps a b = eql eps (G.toList a) (G.toList b)
 93 | 
 94 | eqll :: (ApproxEq l, ApproxEq a, Show c, Show d, Eq d, Bounds l ~ Bounds a) =>
 95 |         (l -> d) -> (l -> [a]) -> (Int -> c) -> Bounds l -> l -> l -> Property
 96 | eqll dim toList coord eps a b = counterexample fancy $ eq eps a b
 97 |   where
 98 |     fancy
 99 |       | la /= lb  = "size mismatch: " ++ show la ++ " /= " ++ show lb
100 |       | length summary < length full = summary
101 |       | otherwise = full
102 |     summary = concat . intersperse ", " . catMaybes $
103 |               zipWith3 whee (map coord [(0::Int)..]) xs ys
104 |     full | '\n' `elem` sa = sa ++ "  /=~\n" ++ sb
105 |          | otherwise      = sa ++ " /=~" ++ sb
106 |     (sa, sb) = (show a, show b)
107 |     (xs, ys) = (toList a, toList b)
108 |     (la, lb) = (dim a, dim b)
109 |     whee i x y | eq eps x y = Nothing
110 |                | otherwise  = Just $ show i ++ ": " ++ show x ++ " /=~ " ++ show y
111 | 


--------------------------------------------------------------------------------
/tests/Tests/Correlation.hs:
--------------------------------------------------------------------------------
  1 | {-#LANGUAGE BangPatterns #-}
  2 | 
  3 | module Tests.Correlation
  4 |     ( tests ) where
  5 | 
  6 | import Control.Arrow (Arrow(..))
  7 | import qualified Data.Vector as V
  8 | import Data.Maybe
  9 | import Statistics.Correlation
 10 | import Statistics.Correlation.Kendall
 11 | import Test.Tasty
 12 | import Test.Tasty.QuickCheck hiding (sample)
 13 | import Test.Tasty.HUnit
 14 | 
 15 | import Tests.ApproxEq
 16 | 
 17 | ----------------------------------------------------------------
 18 | -- Tests list
 19 | ----------------------------------------------------------------
 20 | 
 21 | tests :: TestTree
 22 | tests = testGroup "Correlation"
 23 |     [ testProperty "Pearson correlation"           testPearson
 24 |     , testProperty "Spearman correlation is scale invariant" testSpearmanScale
 25 |     , testProperty "Spearman correlation, nonlinear"         testSpearmanNonlinear
 26 |     , testProperty "Kendall test -- general"       testKendall
 27 |     , testCase     "Kendall test -- special cases" testKendallSpecial
 28 |     ]
 29 | 
 30 | 
 31 | ----------------------------------------------------------------
 32 | -- Pearson's correlation
 33 | ----------------------------------------------------------------
 34 | 
 35 | testPearson :: [(Double,Double)] -> Property
 36 | testPearson sample
 37 |   = (length sample > 1 && isJust exact) ==> (case exact of
 38 |                                                Just e  -> e ~= fast
 39 |                                                Nothing -> property False
 40 |                                             )
 41 |   where
 42 |     (~=) = eql 1e-12
 43 |     exact = exactPearson $ map (realToFrac *** realToFrac) sample
 44 |     fast  = pearson $ V.fromList sample
 45 | 
 46 | exactPearson :: [(Rational,Rational)] -> Maybe Double
 47 | exactPearson sample
 48 |   | varX == 0 || varY == 0 = Nothing
 49 |   | otherwise              = Just $ realToFrac cov / sqrt (realToFrac (varX * varY))
 50 |   where
 51 |     (xs,ys) = unzip sample
 52 |     n       = fromIntegral $ length sample
 53 |     -- Mean
 54 |     muX  = sum xs / n
 55 |     muY  = sum ys / n
 56 |     -- Mean of squares
 57 |     muX2 = sum (map (\x->x*x) xs) / n
 58 |     muY2 = sum (map (\x->x*x) ys) / n
 59 |     -- Covariance
 60 |     cov  = sum (zipWith (*) [x - muX | x<-xs] [y - muY | y<-ys]) / n
 61 |     varX = muX2 - muX*muX
 62 |     varY = muY2 - muY*muY
 63 | 
 64 | ----------------------------------------------------------------
 65 | -- Spearman's correlation
 66 | ----------------------------------------------------------------
 67 | 
 68 | -- Test that Spearman correlation is scale invariant
 69 | testSpearmanScale :: [(Double,Double)] -> Double -> Property
 70 | testSpearmanScale xs a
 71 |   = and [ length xs > 1       -- Enough to calculate underflow
 72 |         , a /= 0
 73 |         , not (isNaN c1)
 74 |         , not (isNaN c2)
 75 |         , not (isNaN c3)
 76 |         , not (isNaN c4)
 77 |         ]
 78 |   ==> ( counterexample (show xs2)
 79 |       $ counterexample (show xs3)
 80 |       $ counterexample (show xs4)
 81 |       $ counterexample (show (c1,c2,c3,c4))
 82 |       $ and [ c1 == c4
 83 |            , c1 == signum a * c2
 84 |            , c1 == signum a * c3
 85 |            ]
 86 |       )
 87 |   where
 88 |     xs2 = map ((*a) *** id  ) xs
 89 |     xs3 = map (id   *** (*a)) xs
 90 |     xs4 = map ((*a) *** (*a)) xs
 91 |     c1 = spearman $ V.fromList xs
 92 |     c2 = spearman $ V.fromList xs2
 93 |     c3 = spearman $ V.fromList xs3
 94 |     c4 = spearman $ V.fromList xs4
 95 | 
 96 | -- Test that Spearman correlation allows to transform sample with
 97 | testSpearmanNonlinear :: [(Double,Double)] -> Property
 98 | testSpearmanNonlinear sample0
 99 |   = and [ length sample0 > 1
100 |         , not (isNaN c1)
101 |         , not (isNaN c2)
102 |         , not (isNaN c3)
103 |         , not (isNaN c4)
104 |         ]
105 |   ==> ( counterexample ("S0 = " ++ show sample0)
106 |       $ counterexample ("S1 = " ++ show sample1)
107 |       $ counterexample ("S2 = " ++ show sample2)
108 |       $ counterexample ("S3 = " ++ show sample3)
109 |       $ counterexample ("S4 = " ++ show sample4)
110 |       $ counterexample (show (c1,c2,c3,c4))
111 |       $ and [ c1 == c2
112 |             , c1 == c3
113 |             , c1 == c4
114 |             ]
115 |       )
116 |   where
117 |     -- We need to stretch sample into [-10 .. 10] range to avoid
118 |     -- problems with under/overflows etc.
119 |     stretch xs
120 |       | a == b    = xs
121 |       | otherwise = [ ((x - a)/(b - a) - 0.5) * 20 | x <- xs ]
122 |       where
123 |         a = minimum xs
124 |         b = maximum xs
125 |     sample1 = uncurry zip $ (stretch *** stretch) $ unzip sample0
126 |     sample2 = map (exp *** id ) sample1
127 |     sample3 = map (id  *** exp) sample1
128 |     sample4 = map (exp *** exp) sample1
129 |     c1 = spearman $ V.fromList sample1
130 |     c2 = spearman $ V.fromList sample2
131 |     c3 = spearman $ V.fromList sample3
132 |     c4 = spearman $ V.fromList sample4
133 | 
134 | 
135 | ----------------------------------------------------------------
136 | -- Kendall's correlation
137 | ----------------------------------------------------------------
138 | 
139 | testKendall :: [(Double, Double)] -> Bool
140 | testKendall xy | isNaN r1 = isNaN r2
141 |                | otherwise = r1 == r2
142 |   where
143 |     r1 = kendallBruteForce xy
144 |     r2 = kendall $ V.fromList xy
145 | 
146 | testKendallSpecial :: Assertion
147 | testKendallSpecial = vs @=? map (\(xs, ys) -> kendall $ V.fromList $ zip xs ys) d
148 |   where
149 |     (d, vs) = unzip testData
150 |     testData :: [(([Double], [Double]), Double)]
151 |     testData = [ (([1, 2, 3, 1, 2], [1, 2, 1, 5, 2]), -0.375)
152 |                , (([1, 1, 1, 3, 3], [3, 3, 3, 2, 5]), 0)
153 |                ]
154 | 
155 | 
156 | kendallBruteForce :: [(Double, Double)] -> Double
157 | kendallBruteForce xy = (n_c - n_d) / sqrt ((n_0 - n_1) * (n_0 - n_2))
158 |   where
159 |     allPairs = f xy
160 |     (n_c, n_d, n_1, n_2) = foldl g (0,0,0,0) allPairs
161 |     n_0 = fromIntegral.length $ allPairs
162 |     g (!nc, !nd, !n1, !n2) ((x1, y1), (x2, y2))
163 |       | (x2 - x1) * (y2 - y1) > 0 = (nc+1, nd, n1, n2)
164 |       | (x2 - x1) * (y2 - y1) < 0 = (nc, nd+1, n1, n2)
165 |       | otherwise = if x1 == x2
166 |                        then if y1 == y2
167 |                                then (nc, nd, n1+1, n2+1)
168 |                                else (nc, nd, n1+1, n2)
169 |                        else (nc, nd, n1, n2+1)
170 |     f (x:xs) = zip (repeat x) xs ++ f xs
171 |     f _ = []
172 | 


--------------------------------------------------------------------------------
/tests/Tests/Function.hs:
--------------------------------------------------------------------------------
 1 | module Tests.Function ( tests ) where
 2 | 
 3 | import Statistics.Function
 4 | import Test.Tasty
 5 | import Test.Tasty.QuickCheck
 6 | import Test.QuickCheck
 7 | import Tests.Helpers
 8 | import qualified Data.Vector.Unboxed as U
 9 | 
10 | 
11 | tests :: TestTree
12 | tests = testGroup "S.Function"
13 |   [ testProperty  "Sort is sort"                p_sort
14 |   , testAssertion "nextHighestPowerOfTwo is OK" p_nextHighestPowerOfTwo
15 |   ]
16 | 
17 | 
18 | p_sort :: [Double] -> Property
19 | p_sort xs =
20 |   not (null xs) ==> U.all (uncurry (<=)) (U.zip v $ U.tail v)
21 |     where
22 |       v = sort $ U.fromList xs
23 | 
24 | p_nextHighestPowerOfTwo :: Bool
25 | p_nextHighestPowerOfTwo
26 |   = all (\(good, is) -> all ((==good) . nextHighestPowerOfTwo) is) lists
27 |   where
28 |     pows  = [1 .. 17 :: Int]
29 |     lists = [ (2^m, [2^n+1 .. 2^m]) | (n,m) <- pows `zip` tail pows ]
30 | 


--------------------------------------------------------------------------------
/tests/Tests/Helpers.hs:
--------------------------------------------------------------------------------
 1 | {-# LANGUAGE ScopedTypeVariables #-}
 2 | -- | Helpers for testing
 3 | module Tests.Helpers (
 4 |     -- * helpers
 5 |     T(..)
 6 |   , typeName
 7 |   , Double01(..)
 8 |     -- * IEEE 754
 9 |   , isDenorm
10 |     -- * Generic QC tests
11 |   , monotonicallyIncreases
12 |   , monotonicallyIncreasesIEEE
13 |     -- * HUnit helpers
14 |   , testAssertion
15 |   , testEquality
16 |     -- * QC helpers
17 |   , small
18 |   , unsquare
19 |   , shrinkFixedList
20 |   ) where
21 | 
22 | import Data.Typeable
23 | import Numeric.MathFunctions.Constants (m_tiny)
24 | import Test.Tasty
25 | import Test.Tasty.HUnit
26 | import Test.QuickCheck
27 | import qualified Numeric.IEEE     as IEEE
28 | import qualified Test.Tasty.HUnit as HU
29 | 
30 | -- | Phantom typed value used to select right instance in QC tests
31 | data T a = T
32 | 
33 | -- | String representation of type name
34 | typeName :: Typeable a => T a -> String
35 | typeName = show . typeOf . typeParam
36 |   where
37 |     typeParam :: T a -> a
38 |     typeParam _ = undefined
39 | 
40 | -- | Check if Double denormalized
41 | isDenorm :: Double -> Bool
42 | isDenorm x = let ax = abs x in ax > 0 && ax < m_tiny
43 | 
44 | -- | Generates Doubles in range [0,1]
45 | newtype Double01 = Double01 Double
46 |                    deriving (Show)
47 | instance Arbitrary Double01 where
48 |   arbitrary = do
49 |     (_::Int, x) <- fmap properFraction arbitrary
50 |     return $ Double01 x
51 | 
52 | ----------------------------------------------------------------
53 | -- Generic QC
54 | ----------------------------------------------------------------
55 | 
56 | -- Check that function is nondecreasing
57 | monotonicallyIncreases :: (Ord a, Ord b) => (a -> b) -> a -> a -> Bool
58 | monotonicallyIncreases f x1 x2 = f (min x1 x2) <= f (max x1 x2)
59 | 
60 | -- Check that function is nondecreasing taking rounding errors into
61 | -- account.
62 | --
63 | -- In fact function is allowed to decrease less than one ulp in order
64 | -- to guard against problems with excess precision. On x86 FPU works
65 | -- with 80-bit numbers but doubles are 64-bit so rounding happens
66 | -- whenever values are moved from registers to memory
67 | monotonicallyIncreasesIEEE :: (Ord a, IEEE.IEEE b)  => (a -> b) -> a -> a -> Bool
68 | monotonicallyIncreasesIEEE f x1 x2 =
69 |   y1 <= y2 || (y1 - y2) < y2 * IEEE.epsilon
70 |   where
71 |     y1 = f (min x1 x2)
72 |     y2 = f (max x1 x2)
73 | 
74 | ----------------------------------------------------------------
75 | -- HUnit helpers
76 | ----------------------------------------------------------------
77 | 
78 | testAssertion :: String -> Bool -> TestTree
79 | testAssertion str cont = testCase str $ HU.assertBool str cont
80 | 
81 | testEquality :: (Show a, Eq a) => String -> a -> a -> TestTree
82 | testEquality msg a b = testCase msg $ HU.assertEqual msg a b
83 | 
84 | unsquare :: (Arbitrary a, Show a, Testable b) => (a -> b) -> Property
85 | unsquare = forAll (small arbitrary)
86 | 
87 | small :: Gen a -> Gen a
88 | small act = sized $ \n -> resize (smallish n) act
89 |   where smallish = round . (sqrt :: Double -> Double) . fromIntegral . abs
90 | 
91 | shrinkFixedList :: (a -> [a]) -> [a] -> [[a]]
92 | shrinkFixedList shr (x:xs) = map (:xs) (shr x) ++ map (x:) (shrinkFixedList shr xs)
93 | shrinkFixedList _   []     = []
94 | 


--------------------------------------------------------------------------------
/tests/Tests/KDE.hs:
--------------------------------------------------------------------------------
 1 | -- | Tests for Kernel density estimates.
 2 | module Tests.KDE (
 3 |   tests
 4 |   )where
 5 | 
 6 | import Data.Vector.Unboxed             ((!))
 7 | import Numeric.Sum                     (kbn, sumVector)
 8 | import Statistics.Sample.KernelDensity
 9 | import Test.Tasty                      (TestTree, testGroup)
10 | import Test.Tasty.QuickCheck           (testProperty)
11 | import Test.QuickCheck                 (Property, (==>), counterexample)
12 | import Text.Printf                     (printf)
13 | import qualified Data.Vector.Unboxed as U
14 | 
15 | 
16 | tests :: TestTree
17 | tests = testGroup "KDE"
18 |   [ testProperty "integral(PDF) == 1" t_densityIsPDF
19 |   ]
20 | 
21 | t_densityIsPDF :: [Double] -> Property
22 | t_densityIsPDF vec
23 |   = not (null vec) ==> test
24 |   where
25 |     (xs,ys)  = kde 4096 (U.fromList vec)
26 |     step     = (xs ! 1) - (xs ! 0)
27 |     integral = integratePDF step ys
28 |     --
29 |     test = counterexample (printf "Integral %f" integral)
30 |          $ abs (1 - integral) <= 1e-3
31 | 
32 | 
33 | 
34 | integratePDF :: Double -> U.Vector Double -> Double
35 | integratePDF step vec
36 |   = step * sumVector kbn (U.zipWith (*) vec weights)
37 |   where
38 |     n       = U.length vec
39 |     weights = U.generate n go
40 |       where
41 |         go i | i == 0    = 0.5
42 |              | i == n-1  = 0.5
43 |              | otherwise = 1
44 | 


--------------------------------------------------------------------------------
/tests/Tests/Matrix.hs:
--------------------------------------------------------------------------------
 1 | module Tests.Matrix (tests) where
 2 | 
 3 | import Statistics.Matrix hiding (map)
 4 | import Statistics.Matrix.Algorithms
 5 | import Test.Tasty (TestTree, testGroup)
 6 | import Test.Tasty.QuickCheck (testProperty)
 7 | import Test.QuickCheck
 8 | import Tests.Matrix.Types
 9 | import qualified Data.Vector.Unboxed as U
10 | 
11 | t_row :: Mat Double -> Gen Property
12 | t_row ms@(Mat r _ xs) = do
13 |   i <- choose (0,r-1)
14 |   return $ row (fromMat ms) i === U.fromList (xs !! i)
15 | 
16 | t_column :: Mat Double -> Gen Property
17 | t_column ms@(Mat _ c xs) = do
18 |   i <- choose (0,c-1)
19 |   return $ column (fromMat ms) i === U.fromList (map (!! i) xs)
20 | 
21 | t_center :: Mat Double -> Property
22 | t_center ms@(Mat r c xs) =
23 |   (xs !! (r `quot` 2)) !! (c `quot` 2) === center (fromMat ms)
24 | 
25 | t_transpose :: Matrix -> Property
26 | t_transpose m = U.concat (map (column n) [0..rows m-1]) === toVector m
27 |   where n = transpose m
28 | 
29 | t_qr :: Property
30 | t_qr = property $ do
31 |   a <- do (r,c) <- arbitrary
32 |           fromMat <$> arbMatWith r c (fromIntegral <$> choose (-10, 10::Int))
33 |   let (q,r) = qr a
34 |       a'    = multiply q r
35 |   pure $ counterexample ("A  = \n"++show a)
36 |        $ counterexample ("A' = \n"++show a')
37 |        $ counterexample ("Q  = \n"++show q)
38 |        $ counterexample ("R  = \n"++show r)
39 |        $ dimension a == dimension a'
40 |       && ( hasNaN a'
41 |         || and (zipWith (\x y -> abs (x - y) < 1e-12) (toList a) (toList a'))
42 |          )
43 | 
44 | tests :: TestTree
45 | tests = testGroup "Matrix"
46 |   [ testProperty "t_row" t_row
47 |   , testProperty "t_column" t_column
48 |   , testProperty "t_center" t_center
49 |   , testProperty "t_transpose" t_transpose
50 |   , testProperty "t_qr" t_qr
51 |   ]
52 | 


--------------------------------------------------------------------------------
/tests/Tests/Matrix/Types.hs:
--------------------------------------------------------------------------------
 1 | {-# LANGUAGE DeriveFunctor #-}
 2 | {-# OPTIONS_GHC -fno-warn-orphans #-}
 3 | 
 4 | module Tests.Matrix.Types
 5 |     (
 6 |       Mat(..)
 7 |     , fromMat
 8 |     , toMat
 9 |     , arbMat
10 |     , arbMatWith
11 |     ) where
12 | 
13 | import Control.Monad (join)
14 | import Control.Applicative ((<$>), (<*>))
15 | import Statistics.Matrix (Matrix(..), fromList)
16 | import Test.QuickCheck
17 | import Tests.Helpers (shrinkFixedList, small)
18 | import qualified Data.Vector.Unboxed as U
19 | 
20 | data Mat a = Mat { mrows :: Int , mcols :: Int
21 |                  , asList :: [[a]] }
22 |               deriving (Eq, Ord, Show, Functor)
23 | 
24 | fromMat :: Mat Double -> Matrix
25 | fromMat (Mat r c xs) = fromList r c (concat xs)
26 | 
27 | toMat :: Matrix -> Mat Double
28 | toMat (Matrix r c v) = Mat r c . split . U.toList $ v
29 |   where split xs@(_:_) = let (h,t) = splitAt c xs
30 |                          in h : split t
31 |         split []       = []
32 | 
33 | instance (Arbitrary a) => Arbitrary (Mat a) where
34 |     arbitrary = small $ join (arbMat <$> arbitrary <*> arbitrary)
35 |     shrink (Mat r c xs) = Mat r c <$> shrinkFixedList (shrinkFixedList shrink) xs
36 | 
37 | arbMat
38 |   :: (Arbitrary a)
39 |   => Positive (Small Int)
40 |   -> Positive (Small Int)
41 |   -> Gen (Mat a)
42 | arbMat r c = arbMatWith r c arbitrary
43 | 
44 | arbMatWith
45 |   :: (Arbitrary a)
46 |   => Positive (Small Int)
47 |   -> Positive (Small Int)
48 |   -> Gen a
49 |   -> Gen (Mat a)
50 | arbMatWith (Positive (Small r)) (Positive (Small c)) genA =
51 |     Mat r c <$> vectorOf r (vectorOf c genA)
52 | 
53 | instance Arbitrary Matrix where
54 |     arbitrary = fromMat <$> arbitrary
55 |     -- shrink    = map fromMat . shrink . toMat
56 | 


--------------------------------------------------------------------------------
/tests/Tests/NonParametric/Table.hs:
--------------------------------------------------------------------------------
 1 | module Tests.NonParametric.Table (
 2 |       tableKSD
 3 |     , tableKS2D
 4 |     ) where
 5 | 
 6 | -- Table for Kolmogorov-Smirnov statistics for standard normal
 7 | -- distribution. Generated using R.
 8 | --
 9 | -- First element of tuple is D second is sample for which it was
10 | -- calculated.
11 | tableKSD :: [(Double,[Double])]
12 | tableKSD =
13 |   [ (0.2012078,[1.360645,-0.3151904,-1.245443,0.1741977,-0.1421206,-1.798246,1.171594,-1.335844,-5.050093e-2,1.030063,-1.849005,0.6491455,-0.7028004])
14 |   , (0.2569956,[0.3884734,-1.227821,-0.4166262,0.429118,-0.9280124,0.8025867,-0.6703089,-0.2124872,0.1224496,0.1087734,-4.285284e-2,-1.039936,-0.7071956])
15 |   , (0.1960356,[-1.814745,-0.6327167,0.7082493,0.6264716,1.02061,-0.4094635,0.821026,-0.4255047,-0.4820728,-0.2239833,0.648517,1.114283,0.3610216])
16 |   , (0.2095746,[0.187011,0.1805498,0.4448389,0.6065506,0.2308673,0.5292549,-1.489902,-1.455191,0.5449396,-0.1436403,-0.7977073,-0.2693545,0.8260888,-1.474473,-2.158696e-2,-0.1455387])
17 |   , (0.1922603,[0.5772317,-1.255561,1.605823,0.4923361,0.2470848,1.176101,-0.3767689,-0.6896885,0.4509345,-0.5048447,0.9436534,1.025599,0.2998393,-3.415219e-2,1.264315,-1.44433,-1.646449e-2])
18 |   , (0.2173401,[1.812807,-0.8687497,-0.5710508,1.003647,1.142621,0.6546577,-6.083323e-3,1.628574e-2,1.067499,-1.953143,-0.6060077,1.90859,-0.7480553,0.6715162,-0.928759,1.862,1.604621,-0.2171044,-0.1835918])
19 |   , (0.2510541,[-0.4769572,1.062319,0.9952284,1.198086,1.015589,-0.4154523,-0.6711762,1.202902,0.2217098,5.381759e-2,0.6679715,0.2551287,-0.1371492])
20 |   , (0.1996022,[1.158607,-0.7354863,1.526559,-0.7855418,-2.82999,-0.6045106,-0.1830228,0.3306812,-0.819657,-1.223715,0.2536423,-0.4155781,1.447042])
21 |   , (0.2284761,[1.239965,0.8187093,0.5199788,1.172072,0.748259,1.869376e-2,0.1625921,-1.712065,0.7043582,-1.702702,-0.4792806,-0.1023351,0.1187189])
22 |   , (0.2337866,[0.9417261,-0.1024297,-0.7354359,1.099991,0.801984,-0.3745397,-1.749564,1.795771,1.099963,-0.605557,-2.035897,1.893603,-0.3468928,-0.2593938,2.100988,0.9665698,0.8757091,0.7696328,0.8730729,-0.3990352,2.04361,-0.4617864,-0.155021,2.15774,0.2687795,-0.9853512,-0.3264898,1.260026,4.267695,-0.5571145,0.6307067,-0.1691405,-1.730686])
23 |   , (0.3389167,[2.025542,-1.542641,-1.090238,3.99027,9.949129e-2,-0.8974433,-2.508418,6.390346,-2.675515,1.154459,1.688072,2.220727,-0.4743102])
24 |   , (0.4920231,[0.5192906,-3.260813,-1.245185,1.693084,3.561318,4.058924,2.27063,0.9446943,4.794159,-3.423733,0.8240817,0.644059,0.900175,1.932513,1.024586,2.82823,2.072192,-0.353231,-0.4319673,1.505952,1.0199,4.555054,2.364929,5.531467,3.279415,3.19821,2.726925,1.680027,-0.9041334,-0.8246765,-1.343979,8.454955,1.354581])
25 |   , (0.6727408,[-6.705672,-3.193988,-4.612611,-3.207994,-5.070172,-6.141169,-0.397149,-4.093359,-1.204801,-3.986585,-2.724662,0.9868107,-6.295266,-5.95839,-6.35114,-1.679555,-2.635889,-4.050329,1.557428,-2.548465,-0.9073924,-1.502018,-4.535688,-4.158818,-8.833434,-5.944697,-1.569672,-4.70399,-7.832059,-4.093708,-8.393417,-2.085432,-7.06495,-0.4230419,-3.046822,-3.23895,-0.9265873,-9.227822,3.293713,-5.593577,-5.942398,-4.358421,2.660044,-4.301572,-1.258879,0.1499903,3.572833,-3.19844,0.8652432,-0.3025793,-1.576673,-7.666265,-6.751463,-1.398944,-2.690656,-1.429654,7.508364e-2,0.7998344,-3.562074,-1.021431,1.342968,2.110244,-7.561497,-2.372083,-3.649193,-5.7723,-1.068083,0.7537809,-4.569546,-1.198005,-5.638384,-1.227226,-1.195852,-1.118175,-9.130527,0.9675821,-2.497391,0.5988562,-1.965783,-4.25741,-6.547006,-1.459294,-2.380556,-3.977307,-7.809006,-4.276819,-4.028746,-9.055546,-3.599239,-1.470512,-8.253329,-1.351687,-4.269324,-6.140353,-6.30808,-1.834091,-3.135146,-9.391791,3.117815,-5.554733,-2.556769,-3.287376,-2.064013,-5.741995,-5.047918,-4.808841,-1.488526,-0.2351115,-5.760833,-2.722929,-7.012353,2.281171,-3.890514,-1.516824,-1.41011,-1.828457,-5.561244,-3.472142,-10.16919,-0.4369042,-5.698953,-4.587462,-4.897086])
26 |   ]
27 | 
28 | -- Table for 2-sample Kolmogorov-Smirnov statistics. Generated using R
29 | --
30 | -- First element is D, second and third are samples
31 | tableKS2D :: [(Double,[Double],[Double])]
32 | tableKS2D =
33 |   [ (0.2820513,[-0.4212928,2.146532,0.7585263,-0.5086105,-0.7725486,6.235548e-2,-0.1849861,0.861972,-0.1958534,-3.379697e-2,-1.316854,0.6701269],[0.4957582,0.4241167,0.9822869,0.4504248,-0.1749617,1.178098,-1.117222,-0.859273,0.3073736,0.4344583,-0.4761338,-1.332374,1.487291])
34 |   , (0.2820513,[-0.712252,0.7990333,-0.7968473,1.443609,1.163096,-1.349071,-0.1553941,-2.003104,-0.3400618,-0.7019282,0.183293,-0.2352167],[-0.4622455,-0.8132221,0.1161614,-1.472115e-2,1.001454,-6.557789e-2,-0.2531216,-1.032432,0.4105478,1.749614,0.9722899,5.850942e-2,-0.3352746])
35 |   , (0.2564103,[0.3509882,-0.2982833,1.314731,1.264223,-0.8156374,0.3734029,-3.288915e-2,0.6766016,0.9786335,0.1079949,-0.4211722,1.58656],[0.8024675,7.464538e-2,0.2739861,-2.334255e-2,0.5611802,0.6683374,0.4358206,0.349843,1.207834,1.402578,-0.4049183,0.4286042,1.665129])
36 |   , (0.1833333,[1.376196,9.926384e-2,2.199292,-2.04993,0.5585353,-0.4812132,0.1041527,2.084774,0.71194,-1.398245,-4.458574e-2,1.484945,-1.473182,1.020076,-0.7019646,0.2182066,-1.702963,-0.3522622,-0.8129267,-0.6338972],[-1.020371,0.3323861,1.513288,0.1958708,-1.0723,5.323446e-2,-0.9993713,-0.7046356,-0.6781067,-0.4471603,1.512042,-0.2650665,-4.765228e-2,-1.501205,1.228664,0.5332935,-0.2960315,-0.1509683])
37 |   , (0.5666667,[0.7145305,0.1255674,2.001531,0.1419216],[2.113474,-0.3352839,-0.4962429,-1.386079,0.6404667,-0.7145304,0.1084008,-0.9821421,-2.270472,-1.003846,-0.5644588,2.699695,-1.296494,-0.1538839,1.319094,-1.127544,0.3568889,0.2004726,-1.313291,0.3581084,0.3313498,0.9336278,0.9850203,-1.309506,1.170459,-0.7517466,-1.771269,0.7156381,-1.129691,0.877729])
38 |   , (0.5,[0.6950626,0.1643805,-0.3102472,0.4810762,0.1844602,1.338836,-0.8083386,-0.5482141,0.9532421,-0.2644837],[7.527945,-1.95654,1.513725,-1.318431,2.453895,0.2078194,0.7371092,2.834245,-2.134794,3.938259])
39 |   ]
40 | 


--------------------------------------------------------------------------------
/tests/Tests/Orphanage.hs:
--------------------------------------------------------------------------------
  1 | {-# LANGUAGE FlexibleContexts    #-}
  2 | {-# LANGUAGE ScopedTypeVariables #-}
  3 | {-# OPTIONS_GHC -fno-warn-orphans #-}
  4 | -- |
  5 | -- Orphan instances for common data types
  6 | module Tests.Orphanage where
  7 | 
  8 | import Control.Applicative
  9 | import Statistics.Distribution.Beta            (BetaDistribution, betaDistr)
 10 | import Statistics.Distribution.Binomial        (BinomialDistribution, binomial)
 11 | import Statistics.Distribution.CauchyLorentz
 12 | import Statistics.Distribution.ChiSquared      (ChiSquared, chiSquared)
 13 | import Statistics.Distribution.Exponential     (ExponentialDistribution, exponential)
 14 | import Statistics.Distribution.FDistribution   (FDistribution, fDistribution)
 15 | import Statistics.Distribution.Gamma           (GammaDistribution, gammaDistr)
 16 | import Statistics.Distribution.Geometric
 17 | import Statistics.Distribution.Hypergeometric
 18 | import Statistics.Distribution.Laplace         (LaplaceDistribution, laplace)
 19 | import Statistics.Distribution.Lognormal       (LognormalDistribution, lognormalDistr)
 20 | import Statistics.Distribution.NegativeBinomial (NegativeBinomialDistribution, negativeBinomial)
 21 | import Statistics.Distribution.Normal          (NormalDistribution, normalDistr)
 22 | import Statistics.Distribution.Poisson         (PoissonDistribution, poisson)
 23 | import Statistics.Distribution.StudentT
 24 | import Statistics.Distribution.Transform       (LinearTransform, scaleAround)
 25 | import Statistics.Distribution.Uniform         (UniformDistribution, uniformDistr)
 26 | import Statistics.Distribution.Weibull         (WeibullDistribution, weibullDistr)
 27 | import Statistics.Distribution.DiscreteUniform (DiscreteUniform, discreteUniformAB)
 28 | import Statistics.Types
 29 | 
 30 | import Test.QuickCheck         as QC
 31 | 
 32 | 
 33 | ----------------------------------------------------------------
 34 | -- Arbitrary instances for distributions
 35 | ----------------------------------------------------------------
 36 | 
 37 | instance QC.Arbitrary BinomialDistribution where
 38 |   arbitrary = binomial <$> QC.choose (1,100) <*> QC.choose (0,1)
 39 | instance QC.Arbitrary ExponentialDistribution where
 40 |   arbitrary = exponential <$> QC.choose (0,100)
 41 | instance QC.Arbitrary LaplaceDistribution where
 42 |   arbitrary = laplace <$> QC.choose (-10,10) <*> QC.choose (0, 2)
 43 | instance QC.Arbitrary GammaDistribution where
 44 |   arbitrary = gammaDistr <$> QC.choose (0.1,100) <*> QC.choose (0.1,100)
 45 | instance QC.Arbitrary BetaDistribution where
 46 |   arbitrary = betaDistr <$> QC.choose (1e-3,10) <*> QC.choose (1e-3,10)
 47 | instance QC.Arbitrary GeometricDistribution where
 48 |   arbitrary = geometric <$> QC.choose (1e-10,1)
 49 | instance QC.Arbitrary GeometricDistribution0 where
 50 |   arbitrary = geometric0 <$> QC.choose (1e-10,1)
 51 | instance QC.Arbitrary HypergeometricDistribution where
 52 |   arbitrary = do l <- QC.choose (1,20)
 53 |                  m <- QC.choose (0,l)
 54 |                  k <- QC.choose (1,l)
 55 |                  return $ hypergeometric m l k
 56 | instance QC.Arbitrary LognormalDistribution where
 57 |   -- can't choose sigma too big, otherwise goes outside of double-float limit
 58 |   arbitrary = lognormalDistr <$> QC.choose (-100,100) <*> QC.choose (1e-10, 20)
 59 | instance QC.Arbitrary NegativeBinomialDistribution where
 60 |   arbitrary = negativeBinomial <$> QC.choose (1,100) <*> QC.choose (1e-10,1)
 61 | instance QC.Arbitrary NormalDistribution where
 62 |   arbitrary = normalDistr <$> QC.choose (-100,100) <*> QC.choose (1e-3, 1e3)
 63 | instance QC.Arbitrary PoissonDistribution where
 64 |   arbitrary = poisson <$> QC.choose (0,1)
 65 | instance QC.Arbitrary ChiSquared where
 66 |   arbitrary = chiSquared <$> QC.choose (1,100)
 67 | instance QC.Arbitrary UniformDistribution where
 68 |   arbitrary = do a <- QC.arbitrary
 69 |                  b <- QC.arbitrary `suchThat` (/= a)
 70 |                  return $ uniformDistr a b
 71 | instance QC.Arbitrary WeibullDistribution where
 72 |   arbitrary = weibullDistr <$> QC.choose (1e-3,1e3) <*> QC.choose (1e-3, 1e3)
 73 | instance QC.Arbitrary CauchyDistribution where
 74 |   arbitrary = cauchyDistribution
 75 |                 <$> arbitrary
 76 |                 <*> ((abs <$> arbitrary) `suchThat` (> 0))
 77 | instance QC.Arbitrary StudentT where
 78 |   arbitrary = studentT <$> ((abs <$> arbitrary) `suchThat` (>0))
 79 | instance QC.Arbitrary d => QC.Arbitrary (LinearTransform d) where
 80 |   arbitrary = do
 81 |     m <- QC.choose (-10,10)
 82 |     s <- QC.choose (1e-1,1e1)
 83 |     d <- arbitrary
 84 |     return $ scaleAround m s d
 85 | instance QC.Arbitrary FDistribution where
 86 |   arbitrary =  fDistribution
 87 |            <$> ((abs <$> arbitrary) `suchThat` (>0))
 88 |            <*> ((abs <$> arbitrary) `suchThat` (>0))
 89 | 
 90 | 
 91 | instance (Arbitrary a, Ord a, RealFrac a) => Arbitrary (PValue a) where
 92 |   arbitrary = do
 93 |     (_::Int,x) <- properFraction <$> arbitrary
 94 |     return $ mkPValue $ abs x
 95 | 
 96 | instance (Arbitrary a, Ord a, RealFrac a) => Arbitrary (CL a) where
 97 |   arbitrary = do
 98 |     (_::Int,x) <- properFraction <$> arbitrary
 99 |     return $ mkCLFromSignificance $ abs x
100 | 
101 | instance Arbitrary a => Arbitrary (NormalErr a) where
102 |   arbitrary = NormalErr <$> arbitrary
103 | 
104 | instance Arbitrary a => Arbitrary (ConfInt a) where
105 |   arbitrary = liftA3 ConfInt arbitrary arbitrary arbitrary
106 | 
107 | instance (Arbitrary (e a), Arbitrary a) => Arbitrary (Estimate e a) where
108 |   arbitrary = liftA2 Estimate arbitrary arbitrary
109 | 
110 | instance (Arbitrary a) => Arbitrary (UpperLimit a) where
111 |   arbitrary = liftA2 UpperLimit arbitrary arbitrary
112 | 
113 | instance (Arbitrary a) => Arbitrary (LowerLimit a) where
114 |   arbitrary = liftA2 LowerLimit arbitrary arbitrary
115 | 
116 | instance QC.Arbitrary DiscreteUniform where
117 |   arbitrary = discreteUniformAB <$> QC.choose (1,1000) <*> QC.choose(1,1000)
118 | 


--------------------------------------------------------------------------------
/tests/Tests/Parametric.hs:
--------------------------------------------------------------------------------
  1 | module Tests.Parametric (tests) where
  2 | 
  3 | import Data.Maybe (fromJust)
  4 | import Statistics.Test.StudentT
  5 | import Statistics.Types
  6 | import qualified Data.Vector.Unboxed as U
  7 | import Test.Tasty (testGroup)
  8 | import Tests.Helpers  (testEquality)
  9 | import qualified Test.Tasty as Tst
 10 | 
 11 | tests :: Tst.TestTree
 12 | tests = testGroup "Parametric tests" studentTTests
 13 | 
 14 | -- 2 samples x 20 obs data
 15 | --
 16 | -- Both samples are samples from normal distributions with the same variance (= 1.0),
 17 | -- but their means are different (0.0 and 0.5, respectively).
 18 | --
 19 | -- You can reproduce the data with R (3.1.0) as follows:
 20 | --   set.seed(0)
 21 | --   sample1 = rnorm(20)
 22 | --   sample2 = rnorm(20, 0.5)
 23 | --   student = t.test(sample1, sample2, var.equal=T)
 24 | --   welch = t.test(sample1, sample2)
 25 | --   paired = t.test(sample1, sample2, paired=T)
 26 | sample1, sample2 :: U.Vector Double
 27 | sample1 = U.fromList [
 28 |   1.262954284880793e+00,
 29 |  -3.262333607056494e-01,
 30 |   1.329799262922501e+00,
 31 |   1.272429321429405e+00,
 32 |   4.146414344564082e-01,
 33 |  -1.539950041903710e+00,
 34 |  -9.285670347135381e-01,
 35 |  -2.947204467905602e-01,
 36 |  -5.767172747536955e-03,
 37 |   2.404653388857951e+00,
 38 |   7.635934611404596e-01,
 39 |  -7.990092489893682e-01,
 40 |  -1.147657009236351e+00,
 41 |  -2.894615736882233e-01,
 42 |  -2.992151178973161e-01,
 43 |  -4.115108327950670e-01,
 44 |   2.522234481561323e-01,
 45 |  -8.919211272845686e-01,
 46 |   4.356832993557186e-01,
 47 |  -1.237538421929958e+00]
 48 | sample2 = U.fromList [
 49 |   2.757321147216907e-01,
 50 |   8.773956459817011e-01,
 51 |   6.333363608148415e-01,
 52 |   1.304189509744908e+00,
 53 |   4.428932256161913e-01,
 54 |   1.003607972233726e+00,
 55 |   1.585769362145687e+00,
 56 |  -1.909538396968303e-01,
 57 |  -7.845993538721883e-01,
 58 |   5.467261721883520e-01,
 59 |   2.642934435604988e-01,
 60 |  -4.288825501025439e-02,
 61 |   6.668968254321778e-02,
 62 |  -1.494716467962331e-01,
 63 |   1.226750747385451e+00,
 64 |   1.651911754087200e+00,
 65 |   1.492160365445798e+00,
 66 |   7.048689050811874e-02,
 67 |   1.738304100853380e+00,
 68 |   2.206537181457307e-01]
 69 | 
 70 | 
 71 | testTTest :: String
 72 |           -> PValue Double
 73 |           -> Test d
 74 |           -> [Tst.TestTree]
 75 | testTTest name pVal test =
 76 |   [ testEquality name (isSignificant pVal test) NotSignificant
 77 |   , testEquality name (isSignificant (mkPValue $ pValue pVal + 1e-5) test)
 78 |     Significant
 79 |   ]
 80 |   
 81 | studentTTests :: [Tst.TestTree]
 82 | studentTTests = concat
 83 |   [ -- R: t.test(sample1, sample2, alt="two.sided", var.equal=T)
 84 |     testTTest "two-sample t-test SamplesDiffer Student"
 85 |       (mkPValue 0.03410) (fromJust $ studentTTest SamplesDiffer sample1 sample2)
 86 |     -- R: t.test(sample1, sample2, alt="two.sided", var.equal=F)
 87 |   , testTTest "two-sample t-test SamplesDiffer Welch"
 88 |       (mkPValue 0.03483) (fromJust $ welchTTest SamplesDiffer sample1 sample2)
 89 |     -- R: t.test(sample1, sample2, alt="two.sided", paired=T)
 90 |   , testTTest "two-sample t-test SamplesDiffer Paired"
 91 |       (mkPValue 0.03411) (fromJust $ pairedTTest SamplesDiffer sample12)
 92 |     -- R: t.test(sample1, sample2, alt="less", var.equal=T)
 93 |   , testTTest "two-sample t-test BGreater Student"
 94 |       (mkPValue 0.01705) (fromJust $ studentTTest BGreater sample1 sample2)
 95 |     -- R: t.test(sample1, sample2, alt="less", var.equal=F)
 96 |   , testTTest "two-sample t-test BGreater Welch"
 97 |       (mkPValue 0.01741) (fromJust $ welchTTest BGreater sample1 sample2)
 98 |     -- R: t.test(sample1, sample2, alt="less", paired=F)
 99 |   , testTTest "two-sample t-test BGreater Paired"
100 |       (mkPValue 0.01705) (fromJust $ pairedTTest BGreater sample12)
101 |   ]
102 |   where sample12 = U.zip sample1 sample2
103 | 


--------------------------------------------------------------------------------
/tests/Tests/Quantile.hs:
--------------------------------------------------------------------------------
 1 | {-# LANGUAGE ViewPatterns #-}
 2 | -- |
 3 | -- Tests for quantile
 4 | module Tests.Quantile (tests) where
 5 | 
 6 | import Control.Exception
 7 | import qualified Data.Vector.Unboxed as U
 8 | import Test.Tasty
 9 | import Test.Tasty.HUnit
10 | import Test.Tasty.QuickCheck hiding (sample)
11 | import Numeric.MathFunctions.Comparison (ulpDelta,ulpDistance)
12 | import Statistics.Quantile
13 | 
14 | tests :: TestTree
15 | tests = testGroup "Quantiles"
16 |   [ testCase "R alg. 4" $ compareWithR cadpw (0.00, 0.50, 2.50, 8.25, 10.00)
17 |   , testCase "R alg. 5" $ compareWithR hazen (0.00, 1.00, 5.00, 9.00, 10.00)
18 |   , testCase "R alg. 6" $ compareWithR spss  (0.00, 0.75, 5.00, 9.25, 10.00)
19 |   , testCase "R alg. 7" $ compareWithR s     (0.000, 1.375, 5.000, 8.625,10.00)
20 |   , testCase "R alg. 8" $ compareWithR medianUnbiased
21 |       (0.0, 0.9166666666666667, 5.000000000000003, 9.083333333333334, 10.0)
22 |   , testCase "R alg. 9" $ compareWithR normalUnbiased
23 |       (0.0000, 0.9375, 5.0000, 9.0625, 10.0000)
24 |   , testProperty "alg 7." propWeigtedAverage
25 |     -- Test failures
26 |   , testCase "weightedAvg should throw errors" $ do
27 |       let xs  = U.fromList [1,2,3]
28 |           xs0 = U.fromList []
29 |       shouldError "Empty sample" $ weightedAvg 1 4 xs0
30 |       shouldError "N=0"  $ weightedAvg 1 0 xs
31 |       shouldError "N=1"  $ weightedAvg 1 1 xs
32 |       shouldError "k<0"  $ weightedAvg (-1) 4 xs
33 |       shouldError "k>N"  $ weightedAvg 5    4 xs
34 |   , testCase "quantile should throw errors" $ do
35 |       let xs  = U.fromList [1,2,3]
36 |           xs0 = U.fromList []
37 |       shouldError "Empty xs" $ quantile s 1 4 xs0
38 |       shouldError "N=0"  $ quantile s 1 0 xs
39 |       shouldError "N=1"  $ quantile s 1 1 xs
40 |       shouldError "k<0"  $ quantile s (-1) 4 xs
41 |       shouldError "k>N"  $ quantile s 5    4 xs
42 |     --
43 |   , testProperty "quantiles    are OK" propQuantiles
44 |   , testProperty "quantilesVec are OK" propQuantilesVec
45 |   ]
46 | 
47 | sample :: U.Vector Double
48 | sample = U.fromList [0, 1, 2.5, 7.5, 9, 10]
49 | 
50 | -- Compare quantiles implementation with reference R implementation
51 | compareWithR :: ContParam -> (Double,Double,Double,Double,Double) -> Assertion
52 | compareWithR p (q0,q1,q2,q3,q4) = do
53 |   assertEqual "Q 0" q0 $ quantile p 0 4 sample
54 |   assertEqual "Q 1" q1 $ quantile p 1 4 sample
55 |   assertEqual "Q 2" q2 $ quantile p 2 4 sample
56 |   assertEqual "Q 3" q3 $ quantile p 3 4 sample
57 |   assertEqual "Q 4" q4 $ quantile p 4 4 sample
58 | 
59 | propWeigtedAverage :: Positive Int -> Positive Int -> Property
60 | propWeigtedAverage (Positive k) (Positive q) =
61 |   (q >= 2 && k <= q) ==> let q1 = weightedAvg k q sample
62 |                              q2 = quantile s k q sample
63 |                          in counterexample ("weightedAvg   = " ++ show q1)
64 |                           $ counterexample ("quantile      = " ++ show q2)
65 |                           $ counterexample ("delta in ulps = " ++ show (ulpDelta q1 q2))
66 |                           $ ulpDistance q1 q2 <= 16
67 | 
68 | propQuantiles :: Positive Int -> Int -> Int -> NonEmptyList Double -> Property
69 | propQuantiles (Positive n)
70 |               ((`mod` n) -> k1)
71 |               ((`mod` n) -> k2)
72 |               (NonEmpty xs)
73 |   =   n >= 2
74 |   ==> [x1,x2] == quantiles s [k1,k2] n rndXs
75 |   where
76 |     rndXs = U.fromList xs
77 |     x1 = quantile s k1 n rndXs
78 |     x2 = quantile s k2 n rndXs
79 | 
80 | propQuantilesVec :: Positive Int -> Int -> Int -> NonEmptyList Double -> Property
81 | propQuantilesVec (Positive n)
82 |                  ((`mod` n) -> k1)
83 |                  ((`mod` n) -> k2)
84 |                  (NonEmpty xs)
85 |   =   n >= 2
86 |   ==> U.fromList [x1,x2] == quantilesVec s (U.fromList [k1,k2]) n rndXs
87 |   where
88 |     rndXs = U.fromList xs
89 |     x1 = quantile s k1 n rndXs
90 |     x2 = quantile s k2 n rndXs
91 | 
92 | 
93 | shouldError :: String -> a -> Assertion
94 | shouldError nm x = do
95 |   r <- try (evaluate x)
96 |   case r of
97 |     Left  (ErrorCall{}) -> return ()
98 |     Right _             -> assertFailure ("Should call error: " ++ nm)
99 | 


--------------------------------------------------------------------------------
/tests/Tests/Serialization.hs:
--------------------------------------------------------------------------------
 1 | -- |
 2 | -- Tests for data serialization instances
 3 | module Tests.Serialization where
 4 | 
 5 | import Data.Binary (Binary,decode,encode)
 6 | import Data.Aeson  (FromJSON,ToJSON,Result(..),toJSON,fromJSON)
 7 | import Data.Typeable
 8 | 
 9 | import Statistics.Distribution.Beta           (BetaDistribution)
10 | import Statistics.Distribution.Binomial       (BinomialDistribution)
11 | import Statistics.Distribution.CauchyLorentz
12 | import Statistics.Distribution.ChiSquared     (ChiSquared)
13 | import Statistics.Distribution.Exponential    (ExponentialDistribution)
14 | import Statistics.Distribution.FDistribution  (FDistribution)
15 | import Statistics.Distribution.Gamma          (GammaDistribution)
16 | import Statistics.Distribution.Geometric
17 | import Statistics.Distribution.Hypergeometric
18 | import Statistics.Distribution.Laplace        (LaplaceDistribution)
19 | import Statistics.Distribution.Lognormal      (LognormalDistribution)
20 | import Statistics.Distribution.NegativeBinomial (NegativeBinomialDistribution)
21 | import Statistics.Distribution.Normal         (NormalDistribution)
22 | import Statistics.Distribution.Poisson        (PoissonDistribution)
23 | import Statistics.Distribution.StudentT
24 | import Statistics.Distribution.Transform      (LinearTransform)
25 | import Statistics.Distribution.Uniform        (UniformDistribution)
26 | import Statistics.Distribution.Weibull        (WeibullDistribution)
27 | import Statistics.Types
28 | 
29 | import Test.Tasty            (TestTree, testGroup)
30 | import Test.Tasty.QuickCheck (testProperty)
31 | import Test.QuickCheck         as QC
32 | 
33 | import Tests.Helpers
34 | import Tests.Orphanage ()
35 | 
36 | 
37 | tests :: TestTree
38 | tests = testGroup "Test for data serialization"
39 |   [ serializationTests (T :: T (CL Float))
40 |   , serializationTests (T :: T (CL Double))
41 |   , serializationTests (T :: T (PValue Float))
42 |   , serializationTests (T :: T (PValue Double))
43 |   , serializationTests (T :: T (NormalErr Double))
44 |   , serializationTests (T :: T (ConfInt   Double))
45 |   , serializationTests' "T (Estimate NormalErr Double)" (T :: T (Estimate NormalErr Double))
46 |   , serializationTests' "T (Estimate ConfInt Double)" (T :: T (Estimate ConfInt   Double))
47 |   , serializationTests (T :: T (LowerLimit Double))
48 |   , serializationTests (T :: T (UpperLimit Double))
49 |     -- Distributions
50 |   , serializationTests (T :: T BetaDistribution        )
51 |   , serializationTests (T :: T CauchyDistribution      )
52 |   , serializationTests (T :: T ChiSquared              )
53 |   , serializationTests (T :: T ExponentialDistribution )
54 |   , serializationTests (T :: T GammaDistribution       )
55 |   , serializationTests (T :: T LaplaceDistribution     )
56 |   , serializationTests (T :: T LognormalDistribution   )
57 |   , serializationTests (T :: T NegativeBinomialDistribution         )
58 |   , serializationTests (T :: T NormalDistribution      )
59 |   , serializationTests (T :: T UniformDistribution     )
60 |   , serializationTests (T :: T WeibullDistribution     )
61 |   , serializationTests (T :: T StudentT                )
62 |   , serializationTests (T :: T (LinearTransform NormalDistribution))
63 |   , serializationTests (T :: T FDistribution           )
64 |   , serializationTests (T :: T BinomialDistribution       )
65 |   , serializationTests (T :: T GeometricDistribution      )
66 |   , serializationTests (T :: T GeometricDistribution0     )
67 |   , serializationTests (T :: T HypergeometricDistribution )
68 |   , serializationTests (T :: T PoissonDistribution        )
69 |   ]
70 | 
71 | 
72 | serializationTests
73 |   :: (Eq a, Typeable a, Binary a, Show a, Read a, ToJSON a, FromJSON a, Arbitrary a)
74 |   => T a -> TestTree
75 | serializationTests t = serializationTests' (typeName t) t
76 | 
77 | -- Not all types are Typeable, unfortunately
78 | serializationTests'
79 |   :: (Eq a, Binary a, Show a, Read a, ToJSON a, FromJSON a, Arbitrary a)
80 |   => String -> T a -> TestTree
81 | serializationTests' name t = testGroup ("Tests for: " ++ name)
82 |   [ testProperty "show/read" (p_showRead t)
83 |   , testProperty "binary"    (p_binary   t)
84 |   , testProperty "aeson"     (p_aeson    t)
85 |   ]
86 | 
87 | 
88 | 
89 | p_binary :: (Eq a, Binary a) => T a -> a -> Bool
90 | p_binary _ a = a == (decode . encode) a
91 | 
92 | p_showRead :: (Eq a, Read a, Show a) => T a -> a -> Bool
93 | p_showRead _ a = a == (read . show) a
94 | 
95 | p_aeson :: (Eq a, ToJSON a, FromJSON a) => T a -> a -> Bool
96 | p_aeson _ a = Data.Aeson.Success a == (fromJSON . toJSON) a
97 | 


--------------------------------------------------------------------------------
/tests/Tests/Transform.hs:
--------------------------------------------------------------------------------
  1 | {-# LANGUAGE FlexibleInstances #-}
  2 | {-# LANGUAGE FlexibleContexts  #-}
  3 | {-# LANGUAGE ViewPatterns      #-}
  4 | module Tests.Transform
  5 |     (
  6 |       tests
  7 |     ) where
  8 | 
  9 | import Data.Bits ((.&.), shiftL)
 10 | import Data.Complex (Complex((:+)))
 11 | import Numeric.Sum (kbn, sumVector)
 12 | import Statistics.Function (within)
 13 | import Statistics.Transform (CD, dct, fft, idct, ifft)
 14 | import Test.Tasty (TestTree, testGroup)
 15 | import Test.Tasty.QuickCheck (testProperty)
 16 | import Test.QuickCheck ( Positive(..), Arbitrary(..), Blind(..), (==>), Gen
 17 |                        , choose, vectorOf, counterexample, forAll)
 18 | import Test.QuickCheck.Property (Property(..))
 19 | import Tests.Helpers (testAssertion)
 20 | import Text.Printf (printf)
 21 | import qualified Data.Vector.Generic as G
 22 | import qualified Data.Vector.Unboxed as U
 23 | 
 24 | 
 25 | tests :: TestTree
 26 | tests = testGroup "fft" [
 27 |           testProperty "t_impulse"        t_impulse
 28 |         , testProperty "t_impulse_offset" t_impulse_offset
 29 |         , testProperty "ifft . fft = id"  (t_fftInverse $ ifft . fft)
 30 |         , testProperty "fft . ifft = id"  (t_fftInverse $ fft . ifft)
 31 |         , testProperty "idct . dct = id [up to scale]"
 32 |             (t_fftInverse (\v -> U.map (/ (2 * fromIntegral (U.length v))) $ idct $ dct v))
 33 |         , testProperty "dct . idct = id [up to scale]"
 34 |             (t_fftInverse (\v -> U.map (/ (2 * fromIntegral (U.length v))) $ idct $ dct v))
 35 |           -- Exact small size DCT
 36 |           -- 1
 37 |         , testDCT [1] $ [2]
 38 |           -- 2
 39 |         , testDCT [1,0] $ map (*2) [1, cos (pi/4)   ]
 40 |         , testDCT [0,1] $ map (*2) [1, cos (3*pi/4) ]
 41 |           -- 4
 42 |         , testDCT [1,0,0,0] $ map (*2) [1, cos(  pi/8), cos( 2*pi/8), cos( 3*pi/8)]
 43 |         , testDCT [0,1,0,0] $ map (*2) [1, cos(3*pi/8), cos( 6*pi/8), cos( 9*pi/8)]
 44 |         , testDCT [0,0,1,0] $ map (*2) [1, cos(5*pi/8), cos(10*pi/8), cos(15*pi/8)]
 45 |         , testDCT [0,0,0,1] $ map (*2) [1, cos(7*pi/8), cos(14*pi/8), cos(21*pi/8)]
 46 |           -- Exact small size IDCT
 47 |           -- 1
 48 |         , testIDCT [1] [1]
 49 |           -- 2
 50 |         , testIDCT [1,0]            [1,         1          ]
 51 |         , testIDCT [0,1] $ map (*2) [cos(pi/4), cos(3*pi/4)]
 52 |           -- 4
 53 |         , testIDCT [1,0,0,0]            [1,            1,            1,            1            ]
 54 |         , testIDCT [0,1,0,0] $ map (*2) [cos(   pi/8), cos( 3*pi/8), cos( 5*pi/8), cos( 7*pi/8) ]
 55 |         , testIDCT [0,0,1,0] $ map (*2) [cos( 2*pi/8), cos( 6*pi/8), cos(10*pi/8), cos(14*pi/8) ]
 56 |         , testIDCT [0,0,0,1] $ map (*2) [cos( 3*pi/8), cos( 9*pi/8), cos(15*pi/8), cos(21*pi/8) ]
 57 |         ]
 58 | 
 59 | -- A single real-valued impulse at the beginning of an otherwise zero
 60 | -- vector should be replicated in every real component of the result,
 61 | -- and all the imaginary components should be zero.
 62 | t_impulse :: Double -> Positive Int -> Bool
 63 | t_impulse k (Positive m) = U.all (c_near i) (fft v)
 64 |   where v = i `G.cons` G.replicate (n-1) 0
 65 |         i = k :+ 0
 66 |         n = 1 `shiftL` (m .&. 6)
 67 | 
 68 | -- If a real-valued impulse is offset from the beginning of an
 69 | -- otherwise zero vector, the sum-of-squares of each component of the
 70 | -- result should equal the square of the impulse.
 71 | t_impulse_offset :: Double -> Positive Int -> Positive Int -> Property
 72 | t_impulse_offset k (Positive x) (Positive m)
 73 |   -- For numbers smaller than 1e-162 their square underflows and test
 74 |   -- fails spuriously
 75 |   = abs k >= 1e-100 ==> U.all ok (fft v)
 76 |   where v = G.concat [G.replicate xn 0, G.singleton i, G.replicate (n-xn-1) 0]
 77 |         ok (re :+ im) = within ulps (re*re + im*im) (k*k)
 78 |         i  = k :+ 0
 79 |         xn = x `rem` n
 80 |         n  = 1 `shiftL` (m .&. 6)
 81 | 
 82 | -- Test that (ifft . fft ≈ id)
 83 | --
 84 | -- Approximate equality here is tricky. Smaller values of vector tend
 85 | -- to have large relative error. Thus we should test that vectors as
 86 | -- whole are approximate equal.
 87 | t_fftInverse :: (HasNorm (U.Vector a), U.Unbox a, Num a, Show a, Arbitrary a)
 88 |              => (U.Vector a -> U.Vector a) -> Property
 89 | t_fftInverse roundtrip =
 90 |   forAll (Blind <$> genFftVector) $ \(Blind x) ->
 91 |     let n  = G.length x
 92 |         x' = roundtrip x
 93 |         d  = G.zipWith (-) x x'
 94 |         nd = vectorNorm d
 95 |         nx = vectorNorm x
 96 |     in counterexample "Original vector"
 97 |      $ counterexample (show x )
 98 |      $ counterexample "Transformed one"
 99 |      $ counterexample (show x')
100 |      $ counterexample (printf "Length = %i" n)
101 |      $ counterexample (printf "|x - x'| / |x| = %.6g" (nd / nx))
102 |      $ nd <= 3e-14 * nx
103 | 
104 | -- Test discrete cosine transform
105 | testDCT :: [Double] -> [Double] -> TestTree
106 | testDCT (U.fromList -> vec) (U.fromList -> res)
107 |   = testAssertion ("DCT test for " ++ show vec)
108 |   $ vecEqual 3e-14 (dct vec) res
109 | 
110 | -- Test inverse discrete cosine transform
111 | testIDCT :: [Double] -> [Double] -> TestTree
112 | testIDCT (U.fromList -> vec) (U.fromList -> res)
113 |   = testAssertion ("IDCT test for " ++ show vec)
114 |   $ vecEqual 3e-14 (idct vec) res
115 | 
116 | 
117 | 
118 | ----------------------------------------------------------------
119 | 
120 | -- With an error tolerance of 8 ULPs, a million QuickCheck tests are
121 | -- likely to all succeed. With a tolerance of 7, we fail around the
122 | -- half million mark.
123 | ulps :: Int
124 | ulps = 8
125 | 
126 | c_near :: CD -> CD -> Bool
127 | c_near (a :+ b) (c :+ d) = within ulps a c && within ulps b d
128 | 
129 | -- Arbitrary vector for FFT od DCT
130 | genFftVector :: (U.Unbox a, Arbitrary a) => Gen (U.Vector a)
131 | genFftVector = do
132 |   n <- (2^)  <$> choose (1,9::Int)    -- Size of vector
133 |   G.fromList <$> vectorOf n arbitrary -- Vector to transform
134 | 
135 | -- Ad-hoc type class for calculation of vector norm
136 | class HasNorm a where
137 |   vectorNorm :: a -> Double
138 | 
139 | instance HasNorm (U.Vector Double) where
140 |   vectorNorm = sqrt . sumVector kbn . U.map (\x -> x*x)
141 | 
142 | instance HasNorm (U.Vector CD) where
143 |   vectorNorm = sqrt . sumVector kbn . U.map (\(x :+ y) -> x*x + y*y)
144 | 
145 | -- Approximate equality for vectors
146 | vecEqual :: Double -> U.Vector Double -> U.Vector Double -> Bool
147 | vecEqual ε v u
148 |   = vectorNorm (U.zipWith (-) v u) < ε * vectorNorm v
149 | 


--------------------------------------------------------------------------------
/tests/doctest.hs:
--------------------------------------------------------------------------------
1 | import Test.DocTest (doctest)
2 | 
3 | main :: IO ()
4 | main = doctest ["-XHaskell2010", "Statistics"]
5 | 
6 | 


--------------------------------------------------------------------------------
/tests/tests.hs:
--------------------------------------------------------------------------------
 1 | import Test.Tasty (defaultMain,testGroup)
 2 | 
 3 | import qualified Tests.Distribution
 4 | import qualified Tests.Function
 5 | import qualified Tests.KDE
 6 | import qualified Tests.Matrix
 7 | import qualified Tests.NonParametric
 8 | import qualified Tests.Parametric
 9 | import qualified Tests.Transform
10 | import qualified Tests.Correlation
11 | import qualified Tests.Serialization
12 | import qualified Tests.Quantile
13 | 
14 | main :: IO ()
15 | main = defaultMain $ testGroup "statistics"
16 |   [ Tests.Distribution.tests
17 |   , Tests.Function.tests
18 |   , Tests.KDE.tests
19 |   , Tests.Matrix.tests
20 |   , Tests.NonParametric.tests
21 |   , Tests.Parametric.tests
22 |   , Tests.Transform.tests
23 |   , Tests.Correlation.tests
24 |   , Tests.Serialization.tests
25 |   , Tests.Quantile.tests
26 |   ]
27 | 


--------------------------------------------------------------------------------
/tests/utils/Makefile:
--------------------------------------------------------------------------------
 1 | C       = gcc
 2 | CFLAGS  = -W -Wall -O2 -std=c99
 3 | LDFLAGS = -lfftw3
 4 | 
 5 | .PHONY: all clean
 6 | 
 7 | all : fftw
 8 | clean :
 9 | 	rm -rf fftw *.o
10 | 


--------------------------------------------------------------------------------
/tests/utils/fftw.c:
--------------------------------------------------------------------------------
 1 | /* Generate some test cases using fftw3  */
 2 | #include <stdlib.h>
 3 | #include <stdio.h>
 4 | #include <fftw3.h>
 5 | 
 6 | void dump_vector(int n, double* vec) {
 7 |     for(int i = 0; i < n; i++)
 8 |         printf("%20.15f ", vec[i]);
 9 |     printf("\n");
10 | }
11 | 
12 | void dct(int flag, int n) {
13 |     double* in  = malloc( n * sizeof(double));
14 |     double* out = malloc( n * sizeof(double));
15 |     //
16 |     fftw_plan plan = fftw_plan_r2r_1d(n, in, out, flag, FFTW_ESTIMATE);
17 |     for( int k = 0; k < n; k++) {
18 |         // Init input vector
19 |         for( int i = 0; i < n; i++)
20 |             in[i] = 0;
21 |         in[k] = 1;
22 |         // Perform DFT
23 |         fftw_execute(plan);
24 |         // Print results
25 |         dump_vector(n, in );
26 |         dump_vector(n, out);
27 |         printf("\n");
28 |     }
29 |     //
30 |     free(in);
31 |     free(out);
32 |     fftw_destroy_plan(plan);
33 | }
34 | 
35 | int main(void)
36 | {
37 |     printf("DCT II (the DCT)\n");
38 |     dct( FFTW_REDFT10, 2);
39 |     dct( FFTW_REDFT10, 4);
40 |     
41 |     printf("DCT III (Inverse DCT)\n");
42 |     dct( FFTW_REDFT01, 2);
43 |     dct( FFTW_REDFT01, 4);
44 |     
45 |     return 0;    
46 | }
47 | 


--------------------------------------------------------------------------------