├── .gitignore
├── CHANGES
├── LICENSE
├── Makefile
├── README.md
├── _oasis
├── _tags
├── configure
├── examples
    ├── base_ex.ml
    ├── common.ml
    ├── distributions_ex.ml
    ├── resampling_ex.ml
    ├── sample_ex.ml
    └── tests_ex.ml
├── lib
    ├── API.odocl
    ├── META
    ├── base.ml
    ├── base.mli
    ├── distributions.ml
    ├── distributions.mli
    ├── internal.ml
    ├── pareto.mldylib
    ├── pareto.mllib
    ├── pareto.mlpack
    ├── resampling.ml
    ├── resampling.mli
    ├── sample.ml
    ├── sample.mli
    ├── tests.ml
    └── tests.mli
├── lib_test
    ├── common.ml
    ├── distributions_test.ml
    ├── sample_test.ml
    ├── test.ml
    ├── test_runner.ml
    └── tests_test.ml
├── myocamlbuild.ml
├── opam
└── setup.ml


/.gitignore:
--------------------------------------------------------------------------------
1 | _build
2 | setup.log
3 | setup.data
4 | *.native
5 | *.byte
6 | *.docdir
7 | *.ba*
8 | 


--------------------------------------------------------------------------------
/CHANGES:
--------------------------------------------------------------------------------
 1 | pareto Changelog
 2 | ================
 3 | 
 4 | Here you can see the full list of changes between each pareto release.
 5 | 
 6 | Version 0.3
 7 | -----------
 8 | 
 9 | Released on March 29th, 2016
10 | 
11 | - Added MLE for 'Gamma' distribution.
12 | - Added log-probability and log-density to all distributions.
13 | - Added a method for sampling a single value from a distribution, see
14 |   'BaseDistribution.random'.
15 | - Changed the observation label in 'DiscreteDistribution' from ~n to ~k.
16 | - Bumped minimum required ocaml-gsl version to 1.14.0 and dropped custom
17 |   'Combi' stubs.
18 | 
19 | Version 0.2
20 | -----------
21 | 
22 | Released on July 13th, 2013
23 | 
24 | - Added summary statistics, see 'Sample.Summary' module. Thanks to Nicholas
25 |   Lucaroni!
26 | - Added MLE for distributions which have a closed-form MLE. Thanks to Nicholas
27 |   Lucaroni, once again! For distributions, which don't have a closed-form
28 |   solution, we currently provide method of moments estimates. Exceptions are:
29 |   'Cauchy' and 'Hypergeomteric' distributions.
30 | - Added Pearson product-moment correlation and autocorrelation and Spearman
31 |   rank correlation.
32 | - Added Kolmogorov-Smirnov test for goodness of fit and two-sample test.
33 | - Added function, which adjusts P-values for multiple comparisons. Currently,
34 |   two procedures are supported: Benjamini-Hochberg, which controls for FDR and
35 |   Holm-Bonferroni, which controls FWER.
36 | - Abstracted RV type into '*Distribution.elt'.
37 | - Added GSL wrappers for computing sample skewness and kurtosis.
38 | - Added more distribution features: skewness and kurtosis, see
39 |   'Distributions.Features' signature.
40 | - Added 'LogNormal', 'Bernoulli', 'Logistic' and 'Categorical' distributions.
41 | - Added a basic test suite against R and SciPy, thanks to Francois Berenger
42 |   for the reminder.
43 | 
44 | Version 0.1
45 | -----------
46 | 
47 | Initial release, released on June 11th, 2013
48 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | Copyright (c) 2013 Sergei Lebedev
 2 | 
 3 | Permission is hereby granted, free of charge, to any person obtaining a copy
 4 | of this software and associated documentation files (the "Software"), to deal
 5 | in the Software without restriction, including without limitation the rights
 6 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 7 | copies of the Software, and to permit persons to whom the Software is
 8 | furnished to do so, subject to the following conditions:
 9 | 
10 | The above copyright notice and this permission notice shall be included in
11 | all copies or substantial portions of the Software.
12 | 
13 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
14 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
15 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
16 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
17 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
18 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
19 | THE SOFTWARE.
20 | 


--------------------------------------------------------------------------------
/Makefile:
--------------------------------------------------------------------------------
 1 | # OASIS_START
 2 | # DO NOT EDIT (digest: a3c674b4239234cbbe53afe090018954)
 3 | 
 4 | SETUP = ocaml setup.ml
 5 | 
 6 | build: setup.data
 7 | 	$(SETUP) -build $(BUILDFLAGS)
 8 | 
 9 | doc: setup.data build
10 | 	$(SETUP) -doc $(DOCFLAGS)
11 | 
12 | test: setup.data build
13 | 	$(SETUP) -test $(TESTFLAGS)
14 | 
15 | all:
16 | 	$(SETUP) -all $(ALLFLAGS)
17 | 
18 | install: setup.data
19 | 	$(SETUP) -install $(INSTALLFLAGS)
20 | 
21 | uninstall: setup.data
22 | 	$(SETUP) -uninstall $(UNINSTALLFLAGS)
23 | 
24 | reinstall: setup.data
25 | 	$(SETUP) -reinstall $(REINSTALLFLAGS)
26 | 
27 | clean:
28 | 	$(SETUP) -clean $(CLEANFLAGS)
29 | 
30 | distclean:
31 | 	$(SETUP) -distclean $(DISTCLEANFLAGS)
32 | 
33 | setup.data:
34 | 	$(SETUP) -configure $(CONFIGUREFLAGS)
35 | 
36 | configure:
37 | 	$(SETUP) -configure $(CONFIGUREFLAGS)
38 | 
39 | .PHONY: build doc test all install uninstall reinstall clean distclean configure
40 | 
41 | # OASIS_STOP
42 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | ![pareto](http://upload.wikimedia.org/wikipedia/commons/5/55/Vilfredo_F._D._Pareto.jpg)
 2 | 
 3 | `pareto` is an OCaml statistics library, based on [GSL] [gsl], which provides:
 4 | 
 5 | * Common statistical tests for significant differences between samples.
 6 | * Uniform interface for common discrete and continuous probability distributions.
 7 | * Sample statistics, quantile estimation, kernel density estimation.
 8 | * Resampling methods: jackknife, BCa bootstrap.
 9 | 
10 | **Note**: before using `pareto` make sure you understand some of the subtleties
11 | in OCaml GSL bindinds. See section **ERROR HANDLING** in GSL [README] [README].
12 | 
13 | [gsl]: http://www.gnu.org/software/gsl
14 | [README]: https://bitbucket.org/mmottl/gsl-ocaml
15 | 
16 | Installation
17 | ------------
18 | 
19 | Make sure you have `gsl-ocaml`, or install it with [OPAM](http://opam.ocamlpro.com):
20 | 
21 | ```bash
22 | $ opam install gsl
23 | ```
24 | 
25 | Then go with the usual OASIS routines:
26 | 
27 | ```bash
28 | $ ./configure
29 | $ make  # And you're done!
30 | ```
31 | 
32 | ### Examples _(optional)_
33 | 
34 | To build examples:
35 | 
36 | ```bash
37 | $ ./configure --enable-examples
38 | $ make
39 | ```
40 | 
41 | Here's a simple t-test:
42 | 
43 | ```ocaml
44 | open Statistics
45 | 
46 | let open Distributions.Normal in
47 | let v = sample ~size:10 standard in
48 | let open Tests in
49 | let { test_statistic = t; test_pvalue } =
50 |   T.one_sample v ~mean:0. ~alternative:TwoSided ()
51 | in begin
52 |   printf "One-sample T-test for true mean = 0.0\n";
53 |   printf "t = %f, P-value: %f\n" t test_pvalue;
54 | end
55 | ```
56 | 
57 | ### Documentation _(optional)_
58 | 
59 | To build API documentation:
60 | 
61 | ```bash
62 | $ make doc
63 | ```
64 | 
65 | ### Tests _(optional)_ [![Build Status][status-image]][status]
66 | 
67 | To build and run tests:
68 | 
69 | ```bash
70 | $ ./configure --enable-tests
71 | $ make test
72 | ```
73 | 
74 | [status]: https://drone.io/github.com/superbobry/pareto/latest
75 | [status-image]: https://drone.io/github.com/superbobry/pareto/status.png
76 | 


--------------------------------------------------------------------------------
/_oasis:
--------------------------------------------------------------------------------
  1 | OASISFormat:    0.4
  2 | Name:           pareto
  3 | Version:        0.3
  4 | Synopsis:       GSL powered OCaml statistics library
  5 | Authors:        Sergei Lebedev <superbobry@gmail.com>
  6 | Maintainers:    Sergei Lebedev <superbobry@gmail.com>
  7 | License:        MIT
  8 | OCamlVersion:   >= 4.0.0
  9 | FindlibVersion: >= 1.3.1
 10 | Homepage:       https://github.com/superbobry/pareto
 11 | BuildTools:     ocamlbuild, ocamldoc
 12 | Plugins:        META (0.3), DevFiles (0.3)
 13 | 
 14 | Flag strict
 15 |   Description:    Strict compile-time checks
 16 |   Default:        false
 17 | 
 18 | Library pareto
 19 |   Path:              lib
 20 |   Pack:              true
 21 |   Modules:           Base
 22 |                    , Distributions
 23 |                    , Resampling
 24 |                    , Sample
 25 |                    , Tests
 26 |   InternalModules:   Internal
 27 |   if flag(strict)
 28 |     NativeOpt:       -w @a -warn-error -a
 29 |     ByteOpt:         -w @a -warn-error -a
 30 |   BuildDepends:      gsl (>= 1.14.0)
 31 | 
 32 | # Tests
 33 | 
 34 | Executable test_runner
 35 |   Path:               lib_test
 36 |   MainIs:             test_runner.ml
 37 |   Build$:             flag(tests)
 38 |   Custom:             true
 39 |   CompiledObject:     best
 40 |   Install:            false
 41 |   BuildDepends:       pareto
 42 |                     , oUnit (>= 2.0.0)
 43 | 
 44 | Test lib_test
 45 |   Run$:               flag(tests)
 46 |   Command:            $test_runner
 47 |   WorkingDirectory:   lib_test
 48 | 
 49 | # Examples
 50 | 
 51 | Flag examples
 52 |   Description: Build examples
 53 |   Default:     false
 54 | 
 55 | Executable base_ex
 56 |   Path:           examples
 57 |   Build$:         flag(examples)
 58 |   Install:        false
 59 |   CompiledObject: best
 60 |   MainIs:         base_ex.ml
 61 |   BuildDepends:   gsl, pareto
 62 |   NativeOpt:      -w @a
 63 |   ByteOpt:        -w @a
 64 | 
 65 | Executable distributions_ex
 66 |   Path:           examples
 67 |   Build$:         flag(examples)
 68 |   Install:        false
 69 |   CompiledObject: best
 70 |   MainIs:         distributions_ex.ml
 71 |   BuildDepends:   gsl, pareto
 72 |   NativeOpt:      -w @a
 73 |   ByteOpt:        -w @a
 74 | 
 75 | Executable tests_ex
 76 |   Path:           examples
 77 |   Build$:         flag(examples)
 78 |   Install:        false
 79 |   CompiledObject: best
 80 |   MainIs:         tests_ex.ml
 81 |   BuildDepends:   gsl, pareto
 82 |   NativeOpt:      -w @a
 83 |   ByteOpt:        -w @a
 84 | 
 85 | Executable sample_ex
 86 |   Path:           examples
 87 |   Build$:         flag(examples)
 88 |   Install:        false
 89 |   CompiledObject: best
 90 |   MainIs:         sample_ex.ml
 91 |   BuildDepends:   gsl, pareto
 92 |   NativeOpt:      -w @a
 93 |   ByteOpt:        -w @a
 94 | 
 95 | Executable resampling_ex
 96 |   Path:           examples
 97 |   Build$:         flag(examples)
 98 |   Install:        false
 99 |   CompiledObject: best
100 |   MainIs:         resampling_ex.ml
101 |   BuildDepends:   gsl, pareto
102 |   NativeOpt:      -w @a
103 |   ByteOpt:        -w @a
104 | 
105 | Document API
106 |   Title:                API reference for Pareto
107 |   Type:                 OCamlbuild (0.3)
108 |   InstallDir:           $docdir/api
109 |   XOCamlbuildPath:      lib
110 |   XOCamlbuildLibraries: pareto
111 | 
112 | SourceRepository github
113 |   Type:       git
114 |   Location:   https://github.com/superbobry/pareto
115 |   Browser:    https://github.com/superbobry/pareto
116 |   Tag:        $(pkg_version)
117 | 


--------------------------------------------------------------------------------
/_tags:
--------------------------------------------------------------------------------
 1 | # OASIS_START
 2 | # DO NOT EDIT (digest: e03994bacd251b5bdf8e243abc994271)
 3 | # Ignore VCS directories, you can use the same kind of rule outside
 4 | # OASIS_START/STOP if you want to exclude directories that contains
 5 | # useless stuff for the build process
 6 | true: annot, bin_annot
 7 | <**/.svn>: -traverse
 8 | <**/.svn>: not_hygienic
 9 | ".bzr": -traverse
10 | ".bzr": not_hygienic
11 | ".hg": -traverse
12 | ".hg": not_hygienic
13 | ".git": -traverse
14 | ".git": not_hygienic
15 | "_darcs": -traverse
16 | "_darcs": not_hygienic
17 | # Library pareto
18 | "lib/pareto.cmxs": use_pareto
19 | "lib/base.cmx": for-pack(Pareto)
20 | "lib/distributions.cmx": for-pack(Pareto)
21 | "lib/resampling.cmx": for-pack(Pareto)
22 | "lib/sample.cmx": for-pack(Pareto)
23 | "lib/tests.cmx": for-pack(Pareto)
24 | "lib/internal.cmx": for-pack(Pareto)
25 | <lib/pareto.{cma,cmxa}>: oasis_library_pareto_byte
26 | <lib/*.ml{,i,y}>: oasis_library_pareto_byte
27 | <lib/pareto.{cma,cmxa}>: oasis_library_pareto_native
28 | <lib/*.ml{,i,y}>: oasis_library_pareto_native
29 | <lib/*.ml{,i,y}>: package(gsl)
30 | # Executable test_runner
31 | <lib_test/test_runner.{native,byte}>: package(gsl)
32 | <lib_test/test_runner.{native,byte}>: package(oUnit)
33 | <lib_test/test_runner.{native,byte}>: use_pareto
34 | <lib_test/*.ml{,i,y}>: package(gsl)
35 | <lib_test/*.ml{,i,y}>: package(oUnit)
36 | <lib_test/*.ml{,i,y}>: use_pareto
37 | <lib_test/test_runner.{native,byte}>: custom
38 | # Executable base_ex
39 | <examples/base_ex.{native,byte}>: oasis_executable_base_ex_byte
40 | <examples/*.ml{,i,y}>: oasis_executable_base_ex_byte
41 | <examples/base_ex.{native,byte}>: oasis_executable_base_ex_native
42 | <examples/*.ml{,i,y}>: oasis_executable_base_ex_native
43 | <examples/base_ex.{native,byte}>: package(gsl)
44 | <examples/base_ex.{native,byte}>: use_pareto
45 | # Executable distributions_ex
46 | <examples/distributions_ex.{native,byte}>: oasis_executable_distributions_ex_byte
47 | <examples/*.ml{,i,y}>: oasis_executable_distributions_ex_byte
48 | <examples/distributions_ex.{native,byte}>: oasis_executable_distributions_ex_native
49 | <examples/*.ml{,i,y}>: oasis_executable_distributions_ex_native
50 | <examples/distributions_ex.{native,byte}>: package(gsl)
51 | <examples/distributions_ex.{native,byte}>: use_pareto
52 | # Executable tests_ex
53 | <examples/tests_ex.{native,byte}>: oasis_executable_tests_ex_byte
54 | <examples/*.ml{,i,y}>: oasis_executable_tests_ex_byte
55 | <examples/tests_ex.{native,byte}>: oasis_executable_tests_ex_native
56 | <examples/*.ml{,i,y}>: oasis_executable_tests_ex_native
57 | <examples/tests_ex.{native,byte}>: package(gsl)
58 | <examples/tests_ex.{native,byte}>: use_pareto
59 | # Executable sample_ex
60 | <examples/sample_ex.{native,byte}>: oasis_executable_sample_ex_byte
61 | <examples/*.ml{,i,y}>: oasis_executable_sample_ex_byte
62 | <examples/sample_ex.{native,byte}>: oasis_executable_sample_ex_native
63 | <examples/*.ml{,i,y}>: oasis_executable_sample_ex_native
64 | <examples/sample_ex.{native,byte}>: package(gsl)
65 | <examples/sample_ex.{native,byte}>: use_pareto
66 | # Executable resampling_ex
67 | <examples/resampling_ex.{native,byte}>: oasis_executable_resampling_ex_byte
68 | <examples/*.ml{,i,y}>: oasis_executable_resampling_ex_byte
69 | <examples/resampling_ex.{native,byte}>: oasis_executable_resampling_ex_native
70 | <examples/*.ml{,i,y}>: oasis_executable_resampling_ex_native
71 | <examples/resampling_ex.{native,byte}>: package(gsl)
72 | <examples/resampling_ex.{native,byte}>: use_pareto
73 | <examples/*.ml{,i,y}>: package(gsl)
74 | <examples/*.ml{,i,y}>: use_pareto
75 | # OASIS_STOP
76 | 
77 | <lib_top/*>: use_toploop
78 | 


--------------------------------------------------------------------------------
/configure:
--------------------------------------------------------------------------------
 1 | #!/bin/sh
 2 | 
 3 | # OASIS_START
 4 | # DO NOT EDIT (digest: dc86c2ad450f91ca10c931b6045d0499)
 5 | set -e
 6 | 
 7 | FST=true
 8 | for i in "$@"; do
 9 |   if $FST; then
10 |     set --
11 |     FST=false
12 |   fi
13 | 
14 |   case $i in
15 |     --*=*)
16 |       ARG=${i%%=*}
17 |       VAL=${i##*=}
18 |       set -- "$@" "$ARG" "$VAL"
19 |       ;;
20 |     *)
21 |       set -- "$@" "$i"
22 |       ;;
23 |   esac
24 | done
25 | 
26 | ocaml setup.ml -configure "$@"
27 | # OASIS_STOP
28 | 


--------------------------------------------------------------------------------
/examples/base_ex.ml:
--------------------------------------------------------------------------------
 1 | open Pareto
 2 | 
 3 | open Common
 4 | 
 5 | 
 6 | let sample_shuffle () =
 7 |   let vs  = Array.init 10 (fun i -> float_of_int i) in
 8 |   let svs = Base.shuffle vs in
 9 |   let svs_with_replacement = Base.sample ~replace:true ~size:5 vs
10 |   and svs_without_replacement = Base.sample ~size:5 vs in begin
11 |     print_string "Initial sample : ";
12 |     print_float_array vs;
13 |     print_string "Shuffled sample: ";
14 |     print_float_array svs;
15 |     print_endline "5-sample *with* replacement from the shuffled array:";
16 |     print_float_array svs_with_replacement;
17 |     print_endline "5-sample *without* replacement from the shuffled array:";
18 |     print_float_array svs_without_replacement;
19 |     print_newline ()
20 |   end
21 | 
22 | 
23 | let () = begin
24 |   sample_shuffle ()
25 | end
26 | 


--------------------------------------------------------------------------------
/examples/common.ml:
--------------------------------------------------------------------------------
 1 | open Printf
 2 | 
 3 | let print_array f vs =
 4 |   print_char '[';
 5 |   Array.iteri (fun i v ->
 6 |       f v;
 7 |       if i <> Array.length vs - 1
 8 |       then print_string ", ") vs;
 9 |   print_char ']';
10 |   print_newline ()
11 | 
12 | let print_float_array = print_array (printf "%.5f")
13 | and print_int_array = print_array (printf "%i")
14 | 
15 | let random_array ?(a=0.) ?(b=100.) n =
16 |   let vs = Array.make n 0. in
17 |   for i = 0 to n - 1 do
18 |     vs.(i) <- a +. Random.float b
19 |   done; vs
20 | 


--------------------------------------------------------------------------------
/examples/distributions_ex.ml:
--------------------------------------------------------------------------------
 1 | open Pareto.Distributions
 2 | 
 3 | open Common
 4 | 
 5 | 
 6 | let distribution_mean (type t)
 7 |     (module F : Features.S with type t = t and type elt = float)
 8 |     (d : t) () =
 9 |   Printf.printf "E[X] = %.4f\n" (F.mean d)
10 | 
11 | let distribution_quantile (type t)
12 |     (module D : ContinuousDistribution with type t = t and type elt = float)
13 |     (d : t) () =
14 |   Printf.printf "Q(0.5) = %.4f\n" (D.quantile ~p:0.5 d)
15 | 
16 | let distribution_sample (type t)
17 |     (module D : DiscreteDistribution with type t = t and type elt = int)
18 |     (d : t) () =
19 |   print_int_array (D.sample ~size:10 d)
20 | 
21 | 
22 | let () = begin
23 |   distribution_mean (module Normal) (normal ~mean:0. ~sd:1.) ();
24 |   distribution_quantile (module Beta) (beta ~alpha:1. ~beta:0.5) ();
25 |   distribution_sample (module Poisson) (poisson ~rate:0.5) ()
26 | end
27 | 


--------------------------------------------------------------------------------
/examples/resampling_ex.ml:
--------------------------------------------------------------------------------
 1 | open Printf
 2 | 
 3 | open Pareto
 4 | 
 5 | open Common
 6 | 
 7 | let bootstrap_mean () =
 8 |   let open Distributions.Normal in
 9 |   let vs = sample ~size:100 standard in
10 |   let open Resampling.Bootstrap in
11 |   let { point; upper_bound; lower_bound; confidence_level } =
12 |     bca ~estimator:Sample.mean ~n_iter:10000 vs
13 |   in begin
14 |     print_endline "Sample:";
15 |     print_float_array vs;
16 |     print_endline "BCA bootstrapped estimate of sample mean:";
17 |     printf "%.5f    %i%% CI  %.5f %.5f\n"
18 |       point (int_of_float (confidence_level *. 100.)) lower_bound upper_bound;
19 |     print_newline ()
20 |   end
21 | 
22 | 
23 | let () = begin
24 |   bootstrap_mean ()
25 | end
26 | 


--------------------------------------------------------------------------------
/examples/sample_ex.ml:
--------------------------------------------------------------------------------
  1 | open Printf
  2 | 
  3 | open Pareto
  4 | 
  5 | open Common
  6 | 
  7 | let print_histogram (points, counts) =
  8 |   Array.iteri (fun i count ->
  9 |       printf "%9.5f " points.(i);
 10 |       for _i = 0 to int_of_float count do
 11 |         print_char '*';
 12 |       done;
 13 | 
 14 |       print_newline ();
 15 |     ) counts
 16 | 
 17 | and print_density (points, pdf) =
 18 |   Array.iteri (fun i d ->
 19 |       let count = int_of_float (d *. 20.) in
 20 |       printf "%9.5f " points.(i);
 21 |       for i = 0 to count do
 22 |         print_char (if i = count then '.' else ' ');
 23 |       done;
 24 | 
 25 |       print_newline ();
 26 |     ) pdf
 27 | 
 28 | 
 29 | let sample_histogram () =
 30 |   let open Distributions.Normal in
 31 |   let vs = sample ~size:100 standard in
 32 |   let (points, counts) = Sample.histogram ~n_bins:10 vs in begin
 33 |     print_endline "Normal sample histogram";
 34 |     print_float_array counts;
 35 |     print_histogram (points, counts);
 36 |     print_newline ()
 37 |   end
 38 | 
 39 | let sample_kde () =
 40 |   let open Distributions.Normal in
 41 |   let vs = sample ~size:100 standard in
 42 |   let (points, pdf) =
 43 |     let open Sample.KDE in
 44 |     estimate_pdf ~kernel:Gaussian ~bandwidth:Silverman ~n_points:10 vs
 45 |   in begin
 46 |     print_endline "Normal sample (Gaussian) KDE";
 47 |     print_float_array pdf;
 48 |     print_density (points, pdf);
 49 |     print_newline ()
 50 |   end
 51 | 
 52 | let sample_quantiles () =
 53 |   let vs = random_array 10
 54 |   and ps = [|0.; 0.25; 0.75; 1.|] in begin
 55 |     print_float_array vs;
 56 |     print_float_array ps;
 57 |     print_float_array (Sample.quantile ~ps vs);
 58 |     print_newline ()
 59 |   end
 60 | 
 61 | let sample_iqr () =
 62 |   let vs = random_array 10 in begin
 63 |     print_float_array vs;
 64 |     printf "  IQR: %9.5f\n" (Sample.iqr vs);
 65 |     print_newline ()
 66 |   end
 67 | 
 68 | let sample_ranks () =
 69 |   let vs = random_array 10 in
 70 |   let (_t, ranks) = Sample.rank vs in begin
 71 |     print_endline "Sample ranks";
 72 |     print_float_array vs;
 73 |     print_float_array ranks;
 74 |     print_newline ()
 75 |   end
 76 | 
 77 | let sample_correlation () =
 78 |   let vs1 = random_array 10
 79 |   and vs2 = random_array 10 in begin
 80 |     print_float_array vs1;
 81 |     print_float_array vs2;
 82 |     printf "Pearson product-momentum correlation: %f\n"
 83 |       (Sample.Correlation.pearson vs1 vs2);
 84 |     printf "Spearman rank-correlation: %f\n"
 85 |       (Sample.Correlation.spearman vs1 vs2);
 86 |     print_newline ()
 87 |   end
 88 | 
 89 | let sample_autocorrelation () =
 90 |   let vs = random_array 10 in begin
 91 |     print_float_array vs;
 92 |     print_endline "Pearson product-momentum autocorrelation:";
 93 |     print_float_array (Sample.Correlation.Auto.pearson vs);
 94 |     print_newline ()
 95 |   end
 96 | 
 97 | let sample_summary () =
 98 |   let vs = random_array 10 in
 99 |   let open Sample in
100 |   let s  = Summary.(Array.fold_left add empty vs) in
101 |   begin
102 |     print_float_array vs;
103 |     printf "        kurtosis = %f, skewness = %f\n" (kurtosis vs) (skewness vs);
104 |     printf "Summary kurtosis = %f, skewness = %f\n"
105 |       (Summary.kurtosis s) (Summary.skewness s);
106 |     print_newline ()
107 |   end
108 | 
109 | 
110 | let () = begin
111 |   sample_histogram ();
112 |   sample_kde ();
113 |   sample_quantiles ();
114 |   sample_iqr ();
115 |   sample_ranks ();
116 |   sample_correlation ();
117 |   sample_autocorrelation ();
118 |   sample_summary ()
119 | end
120 | 


--------------------------------------------------------------------------------
/examples/tests_ex.ml:
--------------------------------------------------------------------------------
  1 | open Printf
  2 | 
  3 | open Pareto
  4 | open Pareto.Tests
  5 | 
  6 | open Common
  7 | 
  8 | let t_test_one_sample () =
  9 |   let open Distributions.Normal in
 10 |   let v = sample ~size:10 standard in
 11 |   let { test_statistic = t; test_pvalue } =
 12 |     T.one_sample v ~mean:0. ~alternative:TwoSided ()
 13 |   in begin
 14 |     printf "One-sample T-test for true mean = 0.0\n";
 15 |     print_float_array v;
 16 |     printf "t = %f, P-value: %f\n" t test_pvalue;
 17 |     print_newline ()
 18 |   end
 19 | 
 20 | let t_test_two_sample_independent () =
 21 |   let open Distributions.Normal in
 22 |   let v1 = sample ~size:10 standard in
 23 |   let v2 = sample ~size:10 standard in
 24 |   let { test_statistic = t; test_pvalue } =
 25 |     T.two_sample_independent v1 v2
 26 |       ~mean:0.1 ~equal_variance:false ~alternative:TwoSided ()
 27 |   in begin
 28 |     printf "Two-sample T-test for mean difference not equal to 0.1\n";
 29 |     print_float_array v1;
 30 |     print_float_array v2;
 31 |     printf "t = %f, P-value: %f\n" t test_pvalue;
 32 |     print_newline ()
 33 |   end
 34 | 
 35 | let t_test_two_sample_paired () =
 36 |   let open Distributions.Normal in
 37 |   let v1 = sample ~size:10 standard in
 38 |   let v2 = Array.mapi (fun i x -> x +. v1.(i)) (sample ~size:10 standard) in
 39 |   let { test_statistic = t; test_pvalue } = T.two_sample_paired v1 v2
 40 |       ~mean:0.1 ~alternative:TwoSided ()
 41 |   in begin
 42 |     printf "Paired two-sample T-test for mean difference not equal to 0.1\n";
 43 |     print_float_array v1;
 44 |     print_float_array v2;
 45 |     printf "t = %f, P-value: %f\n" t test_pvalue;
 46 |     print_newline ()
 47 |   end
 48 | 
 49 | let chisq_test_gof () =
 50 |   let open Distributions.Uniform in
 51 |   let v = sample ~size:10 (create ~lower:0. ~upper:1.) in
 52 |   let { test_statistic = chisq; test_pvalue } =
 53 |     ChiSquared.goodness_of_fit v ()
 54 |   in begin
 55 |     print_endline "X^2 test for goodness of fit";
 56 |     print_float_array v;
 57 |     printf "X^2 = %f, P-value: %f\n" chisq test_pvalue;
 58 |     print_newline ()
 59 |   end
 60 | 
 61 | let chisq_test_independence () =
 62 |   let open Distributions.Uniform in
 63 |   let d  = create ~lower:0. ~upper:1. in
 64 |   let v1 = sample ~size:10 d in
 65 |   let v2 = sample ~size:10 d in
 66 |   let { test_statistic = chisq; test_pvalue } =
 67 |     ChiSquared.independence [|v1; v2|] ~correction:true ()
 68 |   in begin
 69 |     print_endline "X^2 test for independence with Yates' continuity correction\n";
 70 |     print_float_array v1;
 71 |     print_float_array v2;
 72 |     printf "X^2 = %f, P-value: %f\n" chisq test_pvalue;
 73 |     print_newline ()
 74 |   end
 75 | 
 76 | let mann_whitney_wilcoxon () =
 77 |   let v1 = [|11; 1; -1; 2; 0|] in
 78 |   let v2 = [|-5; 9; 5; 8; 4|] in
 79 |   let { test_statistic = u; test_pvalue } =
 80 |     MannWhitneyU.two_sample_independent v1 v2
 81 |       ~correction:true ~alternative:TwoSided ()
 82 |   in begin
 83 |     printf "Two-sample Mann-Whitney U test\n";
 84 |     print_int_array v1;
 85 |     print_int_array v2;
 86 |     printf "U = %f, P-value: %f\n" u test_pvalue;
 87 |     print_newline ()
 88 |   end
 89 | 
 90 | let wilcoxon_signed_rank_one_sample () =
 91 |   let vs = [|11.; 1.; -1.; 2.; 0.|] in
 92 |   let { test_statistic = w; test_pvalue } =
 93 |     WilcoxonT.one_sample vs
 94 |       ~shift:1. ~correction:true ~alternative:Greater ()
 95 |   in begin
 96 |     printf "Wilcoxon signed rank test with continuity correction\n";
 97 |     print_float_array vs;
 98 |     printf "W = %f, P-value: %f\n" w test_pvalue;
 99 |     print_newline ()
100 |   end
101 | 
102 | let wilcoxon_signed_rank_paired () =
103 |   let v1 = [|11.; 1.; -1.; 2.; 0.|] in
104 |   let v2 = [|-5.; 9.; 5.; 8.; 4.|] in
105 |   let { test_statistic = w; test_pvalue } =
106 |     WilcoxonT.two_sample_paired v1 v2
107 |       ~correction:true ~alternative:Less ()
108 |   in begin
109 |     print_endline ("Two-sample paired Wilcoxon signed rank test with " ^
110 |                      "continuity correction");
111 |     print_float_array v1;
112 |     print_float_array v2;
113 |     printf "W = %f, P-value: %f\n" w test_pvalue;
114 |     print_newline ()
115 |   end
116 | 
117 | let sign_one_sample () =
118 |   let vs = [|11.; 1.; -1.; 2.; 0.|] in
119 |   let { test_statistic = pi_plus; test_pvalue } =
120 |     Sign.one_sample vs ~shift:1. ~alternative:TwoSided ()
121 |   in begin
122 |     printf "One-sample Sign test\n";
123 |     print_float_array vs;
124 |     printf "π+ = %f, P-value: %f\n" pi_plus test_pvalue;
125 |     print_newline ()
126 |   end
127 | 
128 | let sign_paired () =
129 |   let v1 = [|11.; 1.; -1.; 2.; 0.|] in
130 |   let v2 = [|-5.; 9.; 5.; 8.; 4.|] in
131 |   let { test_statistic = pi_plus; test_pvalue } =
132 |     Sign.two_sample_paired v1 v2 ~alternative:TwoSided ()
133 |   in begin
134 |     printf "Two-sample Sign test\n";
135 |     print_float_array v1;
136 |     print_float_array v2;
137 |     printf "π+ = %f, P-value: %f\n" pi_plus test_pvalue;
138 |     print_newline ()
139 |   end
140 | 
141 | let ks_gof () =
142 |   let open Distributions.Normal in
143 |   let v = sample ~size:10 standard in
144 |   let { test_statistic = d; test_pvalue } =
145 |     KolmogorovSmirnov.goodness_of_fit v
146 |       ~cumulative_probability:(fun x -> cumulative_probability standard ~x)
147 |       ~alternative:TwoSided ()
148 |   in begin
149 |     print_endline "One-sample Kolmogorov-Smirnov test for goodness of fit";
150 |     print_float_array v;
151 |     printf "D = %f, P-value: %f\n" d test_pvalue;
152 |     print_newline ()
153 |   end
154 | 
155 | let ks_two_sample () =
156 |   let open Distributions.Normal in
157 |   let v1 = sample ~size:10 standard in
158 |   let v2 = sample ~size:10 standard in
159 |   let { test_statistic = d; test_pvalue } =
160 |     KolmogorovSmirnov.two_sample v1 v2 ~alternative:TwoSided ()
161 |   in begin
162 |     print_endline "Two-sample Kolmogorov-Smirnov test";
163 |     print_float_array v1;
164 |     print_float_array v2;
165 |     printf "D = %f, P-value: %f\n" d test_pvalue;
166 |     print_newline ()
167 |   end
168 | 
169 | 
170 | let adjust_bh () =
171 |   let open Distributions.Beta in
172 |   let pvalues = sample ~size:10 (create ~alpha:0.5 ~beta:0.5) in
173 |   let adjusted_pvalues = Multiple.(adjust pvalues BenjaminiHochberg) in
174 |   begin
175 |     printf "Benjamini-Hochberg P-value adjustment\n";
176 |     print_float_array pvalues;
177 |     print_float_array adjusted_pvalues;
178 |     print_newline ()
179 |   end
180 | 
181 | let adjust_hb () =
182 |   let open Distributions.Beta in
183 |   let pvalues = sample ~size:10 (create ~alpha:0.5 ~beta:0.5) in
184 |   let adjusted_pvalues = Multiple.(adjust pvalues HolmBonferroni) in
185 |   begin
186 |     printf "Holm-Bonferroni P-value adjustment\n";
187 |     print_float_array pvalues;
188 |     print_float_array adjusted_pvalues;
189 |     print_newline ()
190 |   end
191 | 
192 | 
193 | let () = begin
194 |   t_test_one_sample ();
195 |   t_test_two_sample_independent ();
196 |   t_test_two_sample_paired ();
197 |   chisq_test_gof ();
198 |   chisq_test_independence ();
199 |   mann_whitney_wilcoxon ();
200 |   wilcoxon_signed_rank_one_sample ();
201 |   wilcoxon_signed_rank_paired ();
202 |   sign_one_sample ();
203 |   sign_paired ();
204 |   ks_gof ();
205 |   ks_two_sample ();
206 | 
207 |   adjust_bh ();
208 |   adjust_hb ()
209 | end
210 | 


--------------------------------------------------------------------------------
/lib/API.odocl:
--------------------------------------------------------------------------------
1 | # OASIS_START
2 | # DO NOT EDIT (digest: e7dcb58f1d0a662ba5a939df4901999e)
3 | Base
4 | Distributions
5 | Resampling
6 | Sample
7 | Tests
8 | # OASIS_STOP
9 | 


--------------------------------------------------------------------------------
/lib/META:
--------------------------------------------------------------------------------
 1 | # OASIS_START
 2 | # DO NOT EDIT (digest: e6a492f67e9c0f9953d83800a3a8effd)
 3 | version = "0.3"
 4 | description = "GSL powered OCaml statistics library"
 5 | requires = "gsl"
 6 | archive(byte) = "pareto.cma"
 7 | archive(byte, plugin) = "pareto.cma"
 8 | archive(native) = "pareto.cmxa"
 9 | archive(native, plugin) = "pareto.cmxs"
10 | exists_if = "pareto.cma"
11 | # OASIS_STOP
12 | 
13 | 


--------------------------------------------------------------------------------
/lib/base.ml:
--------------------------------------------------------------------------------
 1 | module Randist = Gsl.Randist
 2 | 
 3 | open Internal
 4 | 
 5 | 
 6 | let range ?(a=0) ~b =
 7 |   if b <= a
 8 |   then [||]
 9 |   else
10 |     let vs = Array.make (b - a) 0 in begin
11 |       for i = a to b - 1 do
12 |         Array.unsafe_set vs (i - a) i
13 |       done; vs
14 |     end
15 | 
16 | 
17 | let cumulative ~f = function
18 |   | [||] -> [||]
19 |   | xs   ->
20 |     let n   = Array.length xs in
21 |     let acc = Array.make n (Array.unsafe_get xs 0) in
22 |     for i = 1 to n - 1 do
23 |       Array.unsafe_set acc i
24 |         (f (Array.unsafe_get acc (i - 1)) (Array.unsafe_get xs i))
25 |     done; acc
26 | 
27 | 
28 | 
29 | let reorder is ~src ~dst =
30 |   let n = Array.length src in
31 |   for i = 0 to n - 1 do
32 |     let j = Array.unsafe_get is i in
33 |     Array.unsafe_set dst i (Array.unsafe_get src j)
34 |   done
35 | 
36 | 
37 | let search_sorted ~cmp vs v =
38 |   let rec loop l r =
39 |     (* We're looking for [v] in a semiclosed interval [l, r).
40 |        Invariants: mid < r,
41 |                    0 < l <= r. *)
42 |     if l >= r
43 |     then None
44 |     else
45 |       let mid = (r - l) / 2 + l in
46 |       let res = cmp v (Array.unsafe_get vs mid) in
47 |       if res = 0
48 |       then Some mid
49 |       else if res < 0
50 |       then loop l mid
51 |       else loop (mid + 1) r
52 |   in loop 0 (Array.length vs)
53 | 
54 | 
55 | let shuffle ?(rng=default_rng) vs =
56 |   let svs = Array.copy vs in begin
57 |     Randist.shuffle rng svs;
58 |     svs
59 |   end
60 | 
61 | and sample ?(rng=default_rng) ?(replace=false) ?size vs =
62 |   let dst = match size with
63 |     | Some n ->
64 |       if vs = [||] || not replace && Array.length vs < n
65 |       then invalid_arg "Base.sample: not enough elements to sample from"
66 |       else Array.make n vs.(0)
67 |     | None   -> Array.copy vs
68 |   in begin
69 |     if replace
70 |     then Randist.sample rng ~src:vs ~dst
71 |     else Randist.choose rng ~src:vs ~dst;
72 |     dst
73 |   end
74 | 


--------------------------------------------------------------------------------
/lib/base.mli:
--------------------------------------------------------------------------------
 1 | (** Tutti-frutti statistical functions. *)
 2 | 
 3 | (** {e O(n)} Calculates a cumulative statistic over a given array. *)
 4 | val cumulative : f:('a -> 'a -> 'a) -> 'a array -> 'a array
 5 | 
 6 | 
 7 | (** {e O(log n)} Searches for the index of a given element [v] in array
 8 |     [vs], sorted with a given comparison function [cmp]. *)
 9 | val search_sorted : cmp:('a -> 'a -> int) -> 'a array -> 'a -> int option
10 | 
11 | 
12 | (** {e O(n)} Reorders values in [src] into [dst], according to a given
13 |     permutation of indices. *)
14 | val reorder : int array -> src:'a array -> dst:'a array -> unit
15 | 
16 | 
17 | (** Creates an array of integers given a semiopen range [\[a, b)]. *)
18 | val range : ?a:int -> b:int -> int array
19 | 
20 | 
21 | (** {e O(n)} Shuffles a given array using Fisher-Yates shuffle. *)
22 | val shuffle : ?rng:Gsl.Rng.t -> 'a array -> 'a array
23 | 
24 | (** {e O(n)} Takes a sample of the specified [size] from the given
25 |     array either with or without replacement. [size] defaults to the
26 |     whole array. *)
27 | val sample
28 |   : ?rng:Gsl.Rng.t -> ?replace:bool -> ?size:int -> 'a array -> 'a array
29 | 


--------------------------------------------------------------------------------
/lib/distributions.ml:
--------------------------------------------------------------------------------
  1 | open Internal
  2 | 
  3 | module Randist = Gsl.Randist
  4 | module Rng = Gsl.Rng
  5 | 
  6 | module Features = struct
  7 |   module type S = sig
  8 |     type elt
  9 |     type t
 10 | 
 11 |     val mean     : t -> elt
 12 |     val variance : t -> elt
 13 |     val skewness : t -> elt
 14 |     val kurtosis : t -> elt
 15 |   end
 16 | 
 17 |   module type Opt = sig
 18 |     type elt
 19 |     type t
 20 | 
 21 |     val mean_opt     : t -> elt option
 22 |     val variance_opt : t -> elt option
 23 |     val skewness_opt : t -> elt option
 24 |     val kurtosis_opt : t -> elt option
 25 |   end
 26 | end
 27 | 
 28 | module type BaseDistribution = sig
 29 |   type elt
 30 |   type t
 31 | 
 32 |   val random : ?rng:Rng.t -> t -> elt
 33 |   val sample : ?rng:Rng.t -> size:int -> t -> elt array
 34 | end
 35 | 
 36 | module type DiscreteDistribution = sig
 37 |   include BaseDistribution
 38 | 
 39 |   val cumulative_probability : t -> k:elt -> float
 40 | 
 41 |   val probability     : t -> k:elt -> float
 42 |   val log_probability : t -> k:elt -> float
 43 | end
 44 | 
 45 | module type ContinuousDistribution = sig
 46 |   include BaseDistribution
 47 | 
 48 |   val cumulative_probability : t -> x:elt -> float
 49 | 
 50 |   val density     : t -> x:elt -> float
 51 |   val log_density : t -> x:elt -> float
 52 | 
 53 |   val quantile : t -> p:float -> elt
 54 | end
 55 | 
 56 | 
 57 | let make_sampler random ?rng ~size d =
 58 |   let init = random ?rng d in
 59 |   let vs   = Array.make size init in begin
 60 |     for i = 1 to size - 1 do
 61 |       Array.unsafe_set vs i (random ?rng d)
 62 |     done; vs
 63 |   end
 64 | 
 65 | 
 66 | module Normal = struct
 67 |   type elt = float
 68 |   type t   = {
 69 |     normal_mean : float;
 70 |     normal_sd   : float
 71 |   }
 72 | 
 73 |   let create ~mean ~sd =
 74 |     if sd > 0.
 75 |     then { normal_mean = mean; normal_sd = sd }
 76 |     else invalid_arg "Normal.create: standard deviation must be positive"
 77 | 
 78 |   let standard = create ~mean:0. ~sd:1.
 79 | 
 80 |   let cumulative_probability { normal_mean; normal_sd } ~x =
 81 |     Gsl.Cdf.gaussian_P ~sigma:normal_sd ~x:(x -. normal_mean)
 82 | 
 83 |   let density { normal_mean; normal_sd } ~x =
 84 |     Randist.gaussian_pdf ~sigma:normal_sd (x -. normal_mean)
 85 |   and log_density { normal_mean; normal_sd } ~x =
 86 |     let z = (x -. normal_mean) /. normal_sd in
 87 |     -0.5 *. z *. z -. (log normal_sd +. 0.5 *. Gsl.Math.(ln2 +. lnpi))
 88 | 
 89 |   let quantile { normal_mean; normal_sd } ~p =
 90 |     if p < 0. || p > 1.
 91 |     then invalid_arg "Normal.quantile: p must be in range [0, 1]"
 92 |     else Gsl.Cdf.gaussian_Pinv ~sigma:normal_sd ~p +. normal_mean
 93 | 
 94 |   let mean { normal_mean; _ } = normal_mean
 95 |   and variance { normal_sd; _ } = sqr normal_sd
 96 |   and skewness _d = 0.
 97 |   and kurtosis _d = 0.
 98 | 
 99 |   let random ?(rng=default_rng) { normal_mean; normal_sd } =
100 |     Randist.gaussian ~sigma:normal_sd rng +. normal_mean
101 |   let sample = make_sampler random
102 | 
103 |   let mle vs =
104 |     let mean = Sample.mean vs in
105 |     let sd   = Sample.sd ~mean vs in
106 |     create ~mean ~sd
107 | end
108 | 
109 | module LogNormal = struct
110 |   type elt = float
111 |   type t   = {
112 |     lognormal_mean : float;
113 |     lognormal_sd   : float
114 |   }
115 | 
116 |   let create ~mean ~sd =
117 |     if sd > 0.
118 |     then { lognormal_mean = mean; lognormal_sd = sd }
119 |     else invalid_arg "LogNormal.create: standard deviation must be positive"
120 | 
121 |   let cumulative_probability { lognormal_mean; lognormal_sd } ~x =
122 |     Gsl.Cdf.lognormal_P ~zeta:lognormal_mean ~sigma:lognormal_sd ~x
123 | 
124 |   let density { lognormal_mean; lognormal_sd } ~x =
125 |     Randist.lognormal_pdf ~zeta:lognormal_mean ~sigma:lognormal_sd x
126 |   and log_density { lognormal_mean; lognormal_sd } ~x =
127 |     if x <= 0.
128 |     then neg_infinity
129 |     else
130 |       let z = (log x -. lognormal_mean) /. lognormal_sd in
131 |       -. log x -. Gsl.Math.(ln2 +. lnpi) /. 2. -.
132 |       log lognormal_sd -. z *. z /. 2.
133 | 
134 |   let quantile { lognormal_mean; lognormal_sd } ~p =
135 |     if p < 0. || p > 1.
136 |     then invalid_arg "LogNormal.quantile: p must be in range [0, 1]"
137 |     else Gsl.Cdf.lognormal_Pinv ~zeta:lognormal_mean ~sigma:lognormal_sd ~p
138 | 
139 |   let mean { lognormal_mean; lognormal_sd } =
140 |     exp (lognormal_mean +. sqr lognormal_sd)
141 |   and variance { lognormal_mean; lognormal_sd } =
142 |     (exp (sqr lognormal_sd) -. 1.) *.
143 |       exp (2. *. lognormal_mean +. sqr lognormal_sd)
144 |   and skewness { lognormal_sd; _ } =
145 |     let sd2 = sqr lognormal_sd in (exp sd2 -. 2.) *. sqrt (exp sd2 -. 1.)
146 |   and kurtosis { lognormal_sd; _ } =
147 |     let sd2 = sqr lognormal_sd in
148 |     exp (4. *. sd2) +. 2. *. exp (3. *. sd2) +. 3. *. exp (2. *. sd2) -. 6.
149 | 
150 |   let random ?(rng=default_rng) { lognormal_mean; lognormal_sd } =
151 |     Randist.lognormal ~zeta:lognormal_mean ~sigma:lognormal_sd rng
152 |   let sample = make_sampler random
153 | 
154 |   let mle vs =
155 |     let log_vs = Array.map ~f:log vs in
156 |     let mean   = Sample.mean log_vs in
157 |     let sd     = Sample.sd ~mean:mean log_vs in
158 |     create ~mean ~sd
159 | end
160 | 
161 | module Uniform = struct
162 |   type elt = float
163 |   type t   = {
164 |     uniform_lower : float;
165 |     uniform_upper : float
166 |   }
167 | 
168 |   let create ~lower ~upper =
169 |     if lower > upper
170 |     then { uniform_lower = upper; uniform_upper = lower }
171 |     else { uniform_lower = lower; uniform_upper = upper }
172 | 
173 |   let cumulative_probability { uniform_lower; uniform_upper } =
174 |     Gsl.Cdf.flat_P ~a:uniform_lower ~b:uniform_upper
175 | 
176 |   let density { uniform_lower; uniform_upper } ~x =
177 |     Randist.flat_pdf ~a:uniform_lower ~b:uniform_upper x
178 |   and log_density { uniform_lower; uniform_upper } ~x =
179 |     if x < uniform_lower || x > uniform_upper
180 |     then neg_infinity
181 |     else -. log (uniform_upper -. uniform_lower)
182 | 
183 |   let quantile { uniform_lower; uniform_upper } ~p =
184 |     if p < 0. || p > 1.
185 |     then invalid_arg "Uniform.quantile: p must be in range [0, 1]"
186 |     else Gsl.Cdf.flat_Pinv ~a:uniform_lower ~b:uniform_upper ~p
187 | 
188 |   let mean { uniform_lower; uniform_upper } =
189 |     0.5 *. (uniform_lower +. uniform_upper)
190 |   and variance { uniform_lower; uniform_upper } =
191 |     sqr (uniform_upper -. uniform_lower) /. 12.
192 |   and skewness _d = 0.
193 |   and kurtosis _d = -6. /. 5.
194 | 
195 |   let random ?(rng=default_rng) { uniform_lower; uniform_upper } =
196 |     Randist.flat ~a:uniform_lower ~b:uniform_upper rng
197 |   let sample = make_sampler random
198 | 
199 |   let mle vs = create ~lower:(Sample.min vs) ~upper:(Sample.max vs)
200 | end
201 | 
202 | module Exponential = struct
203 |   type elt = float
204 |   type t   = { exp_scale : float }
205 | 
206 |   let create ~scale =
207 |     if scale > 0.
208 |     then { exp_scale = scale }
209 |     else invalid_arg "Exponential.create: scale must be positive"
210 | 
211 |   let cumulative_probability { exp_scale } = Gsl.Cdf.exponential_P ~mu:exp_scale
212 | 
213 |   let density { exp_scale } ~x =
214 |     Randist.exponential_pdf ~mu:exp_scale x
215 |   and log_density { exp_scale } ~x =
216 |     if x < 0.
217 |     then neg_infinity
218 |     else -. x /. exp_scale -. log exp_scale
219 | 
220 |   let quantile { exp_scale } ~p =
221 |     if p < 0. || p > 1.
222 |     then invalid_arg "Exponential.quantile: p must be in range [0, 1]"
223 |     else Gsl.Cdf.exponential_Pinv ~mu:exp_scale ~p
224 | 
225 |   let mean { exp_scale } = exp_scale
226 |   and variance { exp_scale } = sqrt exp_scale
227 |   and skewness _d = 2.
228 |   and kurtosis _d = 6.
229 | 
230 |   let random ?(rng=default_rng) { exp_scale } =
231 |     Randist.exponential ~mu:exp_scale rng
232 |   let sample = make_sampler random
233 | 
234 |   let mle vs = create ~scale:(Sample.mean vs)
235 | end
236 | 
237 | module ChiSquared = struct
238 |   type elt = float
239 |   type t   = { chisq_df : float }
240 | 
241 |   let create ~df =
242 |     if df <= 0
243 |     then invalid_arg "ChiSquared.create: degrees of freedom must be non negative"
244 |     else { chisq_df = float_of_int df }
245 | 
246 |   let cumulative_probability { chisq_df } = Gsl.Cdf.chisq_P ~nu:chisq_df
247 | 
248 |   let density { chisq_df } ~x = Randist.chisq_pdf ~nu:chisq_df x
249 |   and log_density { chisq_df } ~x =
250 |     if x < 0.
251 |     then neg_infinity
252 |     else
253 |       let k2 = chisq_df /. 2. in
254 |       -. k2 *. Gsl.Math.ln2 -. Gsl.Sf.lngamma k2 +.
255 |       (k2 -. 1.) *. log x -. x /. 2.
256 | 
257 |   let quantile { chisq_df } ~p =
258 |     if p < 0. || p > 1.
259 |     then invalid_arg "ChiSquared.quantile: p must be in range [0, 1]"
260 |     else Gsl.Cdf.chisq_Pinv ~nu:chisq_df ~p
261 | 
262 |   let mean { chisq_df } = chisq_df
263 |   and variance { chisq_df } = 2. *. chisq_df
264 |   and skewness { chisq_df } = sqrt (8. /. chisq_df)
265 |   and kurtosis { chisq_df } = 12. /. chisq_df
266 | 
267 |   let random ?(rng=default_rng) { chisq_df } =
268 |     Randist.chisq ~nu:chisq_df rng
269 |   let sample = make_sampler random
270 | 
271 |   let mme vs = create ~df:(round (Sample.mean vs))
272 | end
273 | 
274 | module F = struct
275 |   type elt = float
276 |   type t   = {
277 |     f_df1 : float;
278 |     f_df2 : float
279 |   }
280 | 
281 |   let create ~df1 ~df2 =
282 |     if df1 <= 0 || df2 <= 0
283 |     then invalid_arg "F.create: degrees of freedom must be non negative"
284 |     else { f_df1 = float_of_int df1; f_df2 = float_of_int df2 }
285 | 
286 |   let cumulative_probability { f_df1; f_df2 } =
287 |     Gsl.Cdf.fdist_P ~nu1:f_df1 ~nu2:f_df2
288 | 
289 |   let density { f_df1; f_df2 } ~x =
290 |     Randist.fdist_pdf ~nu1:f_df1 ~nu2:f_df2 x
291 |   and log_density { f_df1; f_df2; } ~x =
292 |     let f_df12 = f_df1 /. 2.
293 |     and f_df22 = f_df2 /. 2.
294 |     in f_df12 *. (log f_df1 +. log x) +. f_df22 *. log f_df2 -.
295 |        (f_df12 +. f_df22) *. log (f_df1 *. x +. f_df2) -.
296 |        log x -. Gsl.Sf.lnbeta f_df12 f_df22
297 | 
298 |   let quantile { f_df1; f_df2 } ~p =
299 |     if p < 0. || p > 1.
300 |     then invalid_arg "F.quantile: p must be in range [0, 1]"
301 |     else Gsl.Cdf.fdist_Pinv ~nu1:f_df1 ~nu2:f_df2 ~p
302 | 
303 |   let mean_opt { f_df2; _ } =
304 |     if f_df2 <= 2.
305 |     then None
306 |     else Some (f_df2 /. (f_df2 -. 2.))
307 |   and variance_opt { f_df1; f_df2 } =
308 |     if f_df2 <= 4.
309 |     then None
310 |     else Some (2. *. sqr f_df2 *. (f_df1 +. f_df2 -. 2.) /.
311 |           (f_df1 *. sqr (f_df2 -. 2.) *. (f_df2 -. 4.)))
312 |   and skewness_opt { f_df1; f_df2 } =
313 |     if f_df2 <= 6.
314 |     then None
315 |     else Some ((2. *. f_df1 +. f_df2 -. 2.) *. sqrt (8. *. (f_df2 -. 4.)) /.
316 |            ((f_df2 -. 6.) *. sqrt (f_df1 *. (f_df1 +. f_df2 -. 2.))))
317 |   and kurtosis_opt { f_df1; f_df2 } =
318 |     if f_df2 <= 8.
319 |     then None
320 |     else Some (12. *.
321 |                  (f_df1 *. (5. *. f_df2 -. 22.) *. (f_df1 +. f_df2 -. 2.) +.
322 |                     (f_df2 -. 4.) *. sqr (f_df2 -. 2.)) /.
323 |                  (f_df1 *. (f_df2 -. 6.) *. (f_df2 -. 8.) *.
324 |                     (f_df1 +. f_df2 -. 2.)))
325 | 
326 |   let random ?(rng=default_rng) { f_df1; f_df2 } =
327 |     Randist.fdist ~nu1:f_df1 ~nu2:f_df2 rng
328 |   let sample = make_sampler random
329 | 
330 |   let mme vs =
331 |     let mean     = Sample.mean vs in
332 |     let variance = Sample.variance ~mean vs in
333 |     let df1 =
334 |       -2 * round (sqr mean /. (sqr mean *. (mean -. 1.) +.
335 |                                  mean *. variance -. 2. *. variance))
336 |     and df2 = 2 * round (mean /. (mean -. 1.)) in
337 |     create ~df1 ~df2
338 | end
339 | 
340 | module T = struct
341 |   type elt = float
342 |   type t   = { t_df : float }
343 | 
344 |   let create ~df =
345 |     if df <= 0.
346 |     then invalid_arg "T.create: degrees of freedom must be non negative"
347 |     else { t_df = df }
348 | 
349 |   let cumulative_probability { t_df } = Gsl.Cdf.tdist_P ~nu:t_df
350 | 
351 |   let density { t_df } ~x = Randist.tdist_pdf ~nu:t_df x
352 |   and log_density { t_df } ~x =
353 |     Gsl.Sf.lngamma ((t_df +. 1.) /. 2.) -.
354 |     (log t_df +. Gsl.Math.lnpi) /. 2. -.
355 |     Gsl.Sf.lngamma (t_df /. 2.) -.
356 |     (t_df +. 1.) /. 2. *. Gsl.Sf.log_1plusx (x *. x /. t_df)
357 | 
358 |   let quantile { t_df } ~p =
359 |     if p < 0. || p > 1.
360 |     then invalid_arg "T.quantile: p must be in range [0, 1]"
361 |     else Gsl.Cdf.tdist_Pinv ~nu:t_df ~p
362 | 
363 |   let mean_opt { t_df } = if t_df > 0. then Some 0. else None
364 |   and variance_opt { t_df } =
365 |     if t_df > 2.
366 |     then Some (t_df /. (t_df -. 2.))
367 |     else if t_df > 1.
368 |     then Some infinity
369 |     else None
370 |   and skewness_opt { t_df } =
371 |     if t_df > 3.
372 |     then Some 0.
373 |     else None
374 |   and kurtosis_opt { t_df } =
375 |     if t_df > 4.
376 |     then Some (6. /. (t_df -. 4.))
377 |     else if t_df > 2.
378 |     then Some infinity
379 |     else None
380 | 
381 |   let random ?(rng=default_rng) { t_df } = Randist.tdist ~nu:t_df rng
382 |   let sample = make_sampler random
383 | 
384 |   let mme vs =
385 |     let variance = Sample.variance vs in
386 |     if abs_float variance = infinity
387 |     then invalid_arg "T.mme: infinite sample"
388 |     else create ~df:(2. *. variance /. (variance -. 1.))
389 | end
390 | 
391 | module Gamma = struct
392 |   type elt = float
393 |   type t   = {
394 |     gamma_shape : float;
395 |     gamma_scale : float
396 |   }
397 | 
398 |   let create ~shape ~scale =
399 |     if shape <= 0.
400 |     then invalid_arg "Gamma.create: shape must be positive"
401 |     else if scale <= 0.
402 |     then invalid_arg "Gamma.create: scale must be positive"
403 |     else { gamma_shape = shape; gamma_scale = scale }
404 | 
405 |   let cumulative_probability { gamma_shape; gamma_scale } =
406 |     Gsl.Cdf.gamma_P ~a:gamma_shape ~b:gamma_scale
407 | 
408 |   let density { gamma_shape; gamma_scale } ~x =
409 |     Randist.gamma_pdf ~a:gamma_shape ~b:gamma_scale x
410 |   and log_density { gamma_shape; gamma_scale } ~ x =
411 |     if x <= 0.
412 |     then neg_infinity
413 |     else
414 |       -. Gsl.Sf.lngamma gamma_shape -. gamma_shape *. log gamma_scale +.
415 |       (gamma_shape -. 1.) *. log x -. x /. gamma_scale
416 | 
417 |   let quantile { gamma_shape; gamma_scale } ~p =
418 |     if p < 0. || p > 1.
419 |     then invalid_arg "Gamma.quantile: p must be in range [0, 1]"
420 |     else Gsl.Cdf.gamma_Pinv ~a:gamma_shape ~b:gamma_scale ~p
421 | 
422 |   let mean { gamma_shape; gamma_scale } = gamma_shape *. gamma_scale
423 |   and variance { gamma_shape; gamma_scale } = gamma_shape *. sqr gamma_scale
424 |   and skewness { gamma_shape; _ } = 2. /. sqrt gamma_shape
425 |   and kurtosis { gamma_shape; _ } = 6. /. gamma_shape
426 | 
427 |   let random ?(rng=default_rng) { gamma_shape; gamma_scale } =
428 |     Randist.gamma ~a:gamma_shape ~b:gamma_scale rng
429 |   let sample = make_sampler random
430 | 
431 |   let mme vs =
432 |     let mean     = Sample.mean vs in
433 |     let variance = Sample.variance ~mean vs in
434 |     create ~shape:(sqr mean /. variance) ~scale:(mean /. variance)
435 | 
436 |   let mle ~n_iter ~epsilon vs =
437 |     let log_mean = log (Sample.mean vs)
438 |     and mean_log = Sample.mean (Array.map ~f:log vs) in
439 |     let s        = log_mean -. mean_log in
440 | 
441 |     let f logk  = logk -. Gsl.Sf.psi (exp logk) -. s
442 |     and df logk = 1. -. exp logk *. Gsl.Sf.psi_1 (exp logk) in
443 | 
444 |     let logk = find_root_newton
445 |         ~n_iter ~epsilon
446 |         ~init:(log (3. -. s +. sqrt (sqr (s -. 3.) +. 24. *. s)) -.
447 |                  log 12. -. log s)
448 |         Gsl.Fun.({ f; df; fdf = fun logk -> (f logk, df logk) })
449 |     in create ~shape:(exp logk) ~scale:(Sample.mean vs /. exp logk)
450 | end
451 | 
452 | module Cauchy = struct
453 |   type elt = float
454 |   type t   = {
455 |     cauchy_location : float;
456 |     cauchy_scale    : float
457 |   }
458 | 
459 |   let create ~location ~scale =
460 |     if scale <= 0.
461 |     then invalid_arg "Cauchy.create: scale must be positive"
462 |     else { cauchy_location = location; cauchy_scale = scale }
463 | 
464 |   let standard = create ~location:0. ~scale:1.
465 | 
466 |   let cumulative_probability { cauchy_location; cauchy_scale } ~x =
467 |     Gsl.Cdf.cauchy_P ~a:cauchy_scale ~x:(x -. cauchy_location)
468 | 
469 |   let density { cauchy_location; cauchy_scale } ~x =
470 |     Randist.cauchy_pdf ~a:cauchy_scale (x -. cauchy_location)
471 |   and log_density { cauchy_location; cauchy_scale } ~x =
472 |     let z = (x -. cauchy_location) /. cauchy_scale in
473 |     -. (Gsl.Math.lnpi +. log cauchy_scale +. Gsl.Sf.log_1plusx (z *. z))
474 | 
475 |   let quantile { cauchy_location; cauchy_scale } ~p =
476 |     if p < 0. || p > 1.
477 |     then invalid_arg "Cauchy.quantile: p must be in range [0, 1]"
478 |     else Gsl.Cdf.cauchy_Pinv ~a:cauchy_scale ~p +. cauchy_location
479 | 
480 |   let random ?(rng=default_rng) { cauchy_location; cauchy_scale } =
481 |     Randist.cauchy ~a:cauchy_scale rng +. cauchy_location
482 |   let sample = make_sampler random
483 | end
484 | 
485 | module Beta = struct
486 |   type elt = float
487 |   type   t = {
488 |     beta_alpha : float;
489 |     beta_beta  : float
490 |   }
491 | 
492 |   let create ~alpha ~beta =
493 |     if alpha <= 0. || beta <= 0.
494 |     then invalid_arg "Beta.create: shape parameters must be positive"
495 |     else { beta_alpha = alpha; beta_beta = beta }
496 | 
497 |   let cumulative_probability { beta_alpha; beta_beta } =
498 |     Gsl.Cdf.beta_P ~a:beta_alpha ~b:beta_beta
499 | 
500 |   let density { beta_alpha; beta_beta } ~x =
501 |     Randist.beta_pdf ~a:beta_alpha ~b:beta_beta x
502 |   and log_density { beta_alpha; beta_beta } ~x =
503 |     if x < 0. || x > 1.
504 |     then neg_infinity
505 |     else
506 |       (beta_alpha -. 1.) *. log x +. (beta_beta -. 1.) *. log (1. -. x) -.
507 |       Gsl.Sf.lnbeta beta_alpha beta_beta
508 | 
509 |   let quantile { beta_alpha; beta_beta } ~p =
510 |     if p < 0. || p > 1.
511 |     then invalid_arg "Beta.quantile: p must be in range [0, 1]"
512 |     else Gsl.Cdf.beta_Pinv ~a:beta_alpha ~b:beta_beta ~p
513 | 
514 |   let mean { beta_alpha = a; beta_beta = b } = a /. (a +. b)
515 |   and variance { beta_alpha = a; beta_beta = b} =
516 |     a *. b /. (sqr (a +. b) *. (a +. b +. 1.))
517 |   and skewness { beta_alpha = a; beta_beta = b } =
518 |     2. *. (b -. a) *. sqrt (a +. b +. 1.) /. ((a +. b +. 2.) *. sqrt (a *. b))
519 |   and kurtosis { beta_alpha = a; beta_beta = b } =
520 |     6. *. (sqr (a -. b) *. (a +. b +. 1.) -. a *. b *. (a +. b +. 2.)) /.
521 |       (a *. b *. (a +. b +. 2.) *. (a +. b +. 3.))
522 | 
523 |   let random ?(rng=default_rng) { beta_alpha; beta_beta } =
524 |     Randist.beta ~a:beta_alpha ~b:beta_beta rng
525 |   let sample = make_sampler random
526 | 
527 |   let mme vs =
528 |     let mean     = Sample.mean vs in
529 |     let variance = Sample.variance ~mean vs in
530 |     create ~alpha:(mean *. (sqr mean -. mean +. variance) /. variance)
531 |       ~beta:((mean -. 1.) *. (sqr mean -. mean +. variance) /. variance)
532 | end
533 | 
534 | module Logistic = struct
535 |   type elt = float
536 |   type t   = {
537 |     logistic_location : float;
538 |     logistic_scale    : float
539 |   }
540 | 
541 |   let create ~location ~scale =
542 |     if scale <= 0.
543 |     then invalid_arg "Logistic.create: scale must be positive"
544 |     else { logistic_location = location; logistic_scale = scale }
545 | 
546 |   let cumulative_probability { logistic_location; logistic_scale } ~x =
547 |     Gsl.Cdf.logistic_P ~a:logistic_scale ~x:(x -. logistic_location)
548 | 
549 |   let density { logistic_location; logistic_scale } ~x =
550 |     Randist.logistic_pdf ~a:logistic_scale (x -. logistic_location)
551 |   and log_density { logistic_location; logistic_scale } ~x =
552 |     let z = (x -. logistic_location) /. logistic_scale in
553 |     -. z -. log logistic_scale -. 2. *. (Gsl.Sf.log_1plusx (exp (-. z)))
554 | 
555 |   let quantile { logistic_location; logistic_scale } ~p =
556 |     if p < 0. || p > 1.
557 |     then invalid_arg "Logistic.quantile: p must be in range [0, 1]"
558 |     else Gsl.Cdf.logistic_Pinv ~a:logistic_scale ~p +. logistic_location
559 | 
560 |   let mean { logistic_location; _ } = logistic_location
561 |   and variance { logistic_scale; _ } =
562 |     sqr (logistic_scale *. Gsl.Math.pi) /. 3.
563 |   and skewness _d = 0.
564 |   and kurtosis _d = 1.2
565 | 
566 |   let random ?(rng=default_rng) { logistic_location; logistic_scale } =
567 |     Randist.logistic ~a:logistic_scale rng +. logistic_location
568 |   let sample = make_sampler random
569 | 
570 |   let mme vs =
571 |     let mean     = Sample.mean vs in
572 |     let variance = Sample.variance ~mean vs in
573 |     let open Gsl.Math in
574 |     create ~location:mean ~scale:(sqrt (3. *. variance) /. pi)
575 | end
576 | 
577 | 
578 | module Poisson = struct
579 |   type elt = int
580 |   type t   = { poisson_rate : float }
581 | 
582 |   let create ~rate =
583 |     if rate > 0.
584 |     then { poisson_rate = rate }
585 |     else invalid_arg "Poisson.create: rate must be positive"
586 | 
587 |   let cumulative_probability { poisson_rate } ~k =
588 |     Gsl.Cdf.poisson_P ~mu:poisson_rate ~k
589 | 
590 |   let probability { poisson_rate } ~k =
591 |     Randist.poisson_pdf ~mu:poisson_rate k
592 |   and log_probability { poisson_rate } ~k =
593 |     if k < 0
594 |     then neg_infinity
595 |     else
596 |       float k *. log poisson_rate -.
597 |       Gsl.Sf.lngamma (float (k + 1)) -. poisson_rate
598 | 
599 |   let mean { poisson_rate } = poisson_rate
600 |   and variance { poisson_rate } = poisson_rate
601 |   and skewness { poisson_rate } = 1. /. sqrt poisson_rate
602 |   and kurtosis { poisson_rate } = 1. /. poisson_rate
603 | 
604 |   let random ?(rng=default_rng) { poisson_rate } =
605 |     Randist.poisson ~mu:poisson_rate rng
606 |   let sample = make_sampler random
607 | 
608 |   let mle vs = create ~rate:(Sample.mean (Array.map ~f:float_of_int vs))
609 | end
610 | 
611 | module Bernoulli = struct
612 |   type elt = int
613 |   type t   = { bernoulli_p : float }
614 | 
615 |   let create ~p =
616 |     if p > 1.0 || p < 0.
617 |     then invalid_arg "Bernoulli.create: probability must be in range [0, 1]"
618 |     else { bernoulli_p = p }
619 | 
620 |   let cumulative_probability { bernoulli_p = p } ~k =
621 |     if k < 0
622 |     then 0.
623 |     else if k < 1
624 |     then 1. -. p
625 |     else 1.
626 | 
627 |   let probability { bernoulli_p = p } ~k =
628 |     if k = 0
629 |     then 1. -. p
630 |     else if k = 1
631 |     then p
632 |     else 0.
633 |   let log_probability d ~k = log (probability d ~k)
634 | 
635 |   let mean { bernoulli_p = p } = p
636 |   and variance { bernoulli_p = p } = p *. (1. -. p)
637 |   and skewness { bernoulli_p = p } =
638 |     (1. -. 2. *. p) /. sqrt (p *. (1. -. p))
639 |   and kurtosis { bernoulli_p = p } =
640 |     (1. -. 6. *. p *. (1. -. p)) /. (p *. (1. -. p))
641 | 
642 |   let random ?(rng=default_rng) { bernoulli_p } =
643 |     Randist.bernoulli ~p:bernoulli_p rng
644 |   let sample = make_sampler random
645 | 
646 |   let mle vs =
647 |     let n = Array.length vs
648 |     and y = Array.fold_left ~f:(+) ~init:0 vs in
649 |     create ~p:(float_of_int y /. float_of_int n)
650 | end
651 | 
652 | module Binomial = struct
653 |   type elt = int
654 |   type t   = {
655 |     binomial_trials : int;
656 |     binomial_p      : float
657 |   }
658 | 
659 |   let create ~trials ~p =
660 |     if trials < 0
661 |     then invalid_arg "Binomial.create: number of trials must be non negative"
662 |     else if p > 1.0 || p < 0.
663 |     then invalid_arg "Binomial.create: probability must be in range [0, 1]"
664 |     else { binomial_trials = trials; binomial_p = p }
665 | 
666 |   let cumulative_probability { binomial_trials = n; binomial_p = p } ~k =
667 |     Gsl.Cdf.binomial_P ~n ~p ~k
668 | 
669 |   let probability { binomial_trials = n; binomial_p = p } ~k =
670 |     Randist.binomial_pdf ~n ~p k
671 |   and log_probability { binomial_trials = n; binomial_p = p } ~k =
672 |     if k < 0 || k > n
673 |     then neg_infinity
674 |     else
675 |       Gsl.Sf.lnchoose n k +. float k *. log p +. float (n - k) *. log (1. -. p)
676 | 
677 |   let mean { binomial_trials = n; binomial_p = p } = float_of_int n *. p
678 |   and variance { binomial_trials = n; binomial_p = p } =
679 |     float_of_int n *. p *. (1. -. p)
680 |   and skewness { binomial_trials = n; binomial_p = p } =
681 |     (1. -. 2. *. p) /. sqrt (float_of_int n *. p *. (1. -. p))
682 |   and kurtosis { binomial_trials = n; binomial_p = p } =
683 |     (1. -. 6. *. p *. (1. -. p)) /. (float_of_int n *. p *. (1. -. p))
684 | 
685 |   let random ?(rng=default_rng) { binomial_trials; binomial_p } =
686 |     Randist.binomial ~n:binomial_trials ~p:binomial_p rng
687 |   let sample = make_sampler random
688 | 
689 |   let mme vs =
690 |     let vs   = Array.map ~f:float_of_int vs in
691 |     let mean = Sample.mean vs in
692 |     let variance = Sample.variance ~mean vs in
693 |     create ~trials:(round (sqr mean /. (mean -. variance)))
694 |       ~p:(1. -. variance /. mean)
695 | end
696 | 
697 | module Geometric = struct
698 |   type elt = int
699 |   type t   = { geometric_p : float }
700 | 
701 |   let create ~p =
702 |     if p > 1.0 || p <= 0.
703 |     then invalid_arg "Geometric.create: probability must be in range (0, 1]"
704 |     else { geometric_p = p }
705 | 
706 |   let cumulative_probability { geometric_p = p } ~k =
707 |     Gsl.Cdf.geometric_P ~p ~k
708 | 
709 |   let probability { geometric_p = p } ~k = Randist.geometric_pdf ~p k
710 |   and log_probability { geometric_p = p } ~k =
711 |     if k <= 0
712 |     then neg_infinity
713 |     else float (k - 1) *. log (1. -. p) +. log p
714 | 
715 |   let mean { geometric_p } = 1. /. geometric_p
716 |   and variance { geometric_p } =
717 |     (1. -. geometric_p) /. sqr geometric_p
718 |   and skewness { geometric_p } =
719 |     (2. -. geometric_p) /. sqrt (1. -. geometric_p)
720 |   and kurtosis { geometric_p } =
721 |     6. +. sqr geometric_p /. (1. -. geometric_p)
722 | 
723 |   let random ?(rng=default_rng) { geometric_p } =
724 |     Randist.geometric ~p:geometric_p rng
725 |   let sample = make_sampler random
726 | 
727 |   let mme vs =
728 |     let n = Array.length vs in
729 |     let p = float_of_int n /.
730 |             float_of_int (Array.fold_left ~f:(+) ~init:0 vs)
731 |     in create ~p
732 | end
733 | 
734 | module Hypergeometric = struct
735 |   type elt = int
736 |   type t   = {
737 |     hyper_m : int;
738 |     hyper_t : int;
739 |     hyper_k : int
740 |   }
741 | 
742 |   let create ~m ~t ~k =
743 |     if t < 0
744 |     then invalid_arg "Hypergeometric.create: t must be non negative"
745 |     else if m < 0  || m > t
746 |     then invalid_arg "Hypergeometric.create: m must be in range [0, t]"
747 |     else if k <= 0 || k > t
748 |     then invalid_arg "Hypergeometric.create: k must be in range (0, t]"
749 |     else { hyper_m = m; hyper_t = t; hyper_k = k }
750 | 
751 |   let cumulative_probability { hyper_m; hyper_t; hyper_k } ~k =
752 |     Gsl.Cdf.hypergeometric_P ~n1:hyper_m ~n2:(hyper_t - hyper_m) ~t:hyper_k ~k
753 | 
754 |   let probability { hyper_m; hyper_t; hyper_k } ~k =
755 |     Randist.hypergeometric_pdf ~n1:hyper_m ~n2:(hyper_t - hyper_m) ~t:hyper_k k
756 |   and log_probability { hyper_m; hyper_t; hyper_k } ~k =
757 |     if k < max 0 (hyper_k - hyper_t + hyper_m) || k > min hyper_m hyper_k
758 |     then neg_infinity
759 |     else
760 |       Gsl.Sf.(lnchoose hyper_m k +.
761 |               lnchoose (hyper_t - hyper_m) (hyper_k - k) -.
762 |               lnchoose hyper_t hyper_k)
763 | 
764 |   let mean { hyper_m; hyper_t; hyper_k } =
765 |     float_of_int (hyper_k * hyper_m) /. float_of_int hyper_t
766 |   and variance { hyper_m; hyper_t; hyper_k } =
767 |     let m = float_of_int hyper_m
768 |     and t = float_of_int hyper_t
769 |     and k = float_of_int hyper_k
770 |     in (k *. m /. t) *. (1. -. m /. t) *. (t -. k) /. (t -. 1.)
771 |   and skewness { hyper_m; hyper_t; hyper_k } =
772 |     let m = float_of_int hyper_m
773 |     and t = float_of_int hyper_t
774 |     and k = float_of_int hyper_k
775 |     in (t -. 2. *. m) *. sqrt (t -. 1.) *. (t -. 2. *. k) /.
776 |          (sqrt (k *. m *. (t -. m) *. (t -. k)) *. (t -. 2.))
777 |   and kurtosis { hyper_m; hyper_t; hyper_k } =
778 |     let m = float_of_int hyper_m
779 |     and t = float_of_int hyper_t
780 |     and k = float_of_int hyper_k
781 |     in ((t -. 1.) *. sqr t *.
782 |           (t *. (t +. 1.) -.
783 |              6. *. m *. (t -. m) -.
784 |              6. *. k *. (t -. k)) +.
785 |           6. *. k *. m *. (t -. m) *. (t -. k) *. (5. *. t -. 6.)) /.
786 |          (k *. m *. (t -. m) *. (t -. k) *. (t -. 2.) *. (t -. 3.))
787 | 
788 |   let random ?(rng=default_rng) { hyper_m; hyper_t; hyper_k } =
789 |     Randist.hypergeometric ~n1:hyper_m ~n2:(hyper_t - hyper_m) ~t:hyper_k rng
790 |   let sample = make_sampler random
791 | end
792 | 
793 | module NegativeBinomial = struct
794 |   type elt = int
795 |   type t   = {
796 |     nbinomial_failures : float;
797 |     nbinomial_p        : float
798 |   }
799 | 
800 |   let create ~failures ~p =
801 |     if failures < 0.
802 |     then invalid_arg ("NegativeBinomial.create: number of failures must " ^
803 |                       "be non negative")
804 |     else if p >= 1.0 || p <= 0.
805 |     then invalid_arg "NegativeBinomial.create: probability must be in range (0, 1)"
806 |     else { nbinomial_failures = failures; nbinomial_p = p }
807 | 
808 |   let cumulative_probability { nbinomial_failures = r; nbinomial_p = p } ~k =
809 |     Gsl.Cdf.negative_binomial_P ~n:r ~p:(1. -. p) ~k
810 | 
811 |   let probability { nbinomial_failures = r; nbinomial_p = p } ~k =
812 |     Randist.negative_binomial_pdf ~n:r ~p:(1. -. p) k
813 |   and log_probability { nbinomial_failures = r; nbinomial_p = p } ~k =
814 |     if k < 0
815 |     then neg_infinity
816 |     else
817 |       Gsl.Sf.lngamma (float k +. r) -.
818 |       Gsl.Sf.lngamma (float (k + 1)) -.
819 |       Gsl.Sf.lngamma r +. float k *. log p +. r *. log (1. -. p)
820 | 
821 |   let mean { nbinomial_failures = r; nbinomial_p = p } =
822 |     r *. p /. (1. -. p)
823 |   and variance { nbinomial_failures = r; nbinomial_p = p } =
824 |     r *. p *. sqr (1. -. p)
825 |   and skewness { nbinomial_failures = r; nbinomial_p = p } =
826 |     (1. +. p) /. sqrt (r *. p)
827 |   and kurtosis { nbinomial_failures = r; nbinomial_p = p } =
828 |     6. /. r +. sqr (1. +. p) /. (r *. p)
829 | 
830 |   let random ?(rng=default_rng) { nbinomial_failures; nbinomial_p } =
831 |     Randist.negative_binomial ~n:nbinomial_failures ~p:(1. -. nbinomial_p) rng
832 |   let sample = make_sampler random
833 | 
834 |   let mme vs =
835 |     let vs       = Array.map ~f:float_of_int vs in
836 |     let mean     = Sample.mean vs in
837 |     let variance = Sample.variance ~mean vs in
838 |     create
839 |       ~failures:(sqr mean /. (variance -. mean))
840 |       ~p:(1. -. mean /. variance)
841 | end
842 | 
843 | module Categorical = struct
844 |   module type OrderedType = Map.OrderedType
845 | 
846 |   module type S = sig
847 |     include DiscreteDistribution
848 | 
849 |     val create : (elt * float) array -> t
850 | 
851 |     val mle : elt array -> t
852 |   end
853 | 
854 |   module Make (Elt : OrderedType) = struct
855 |     type elt = Elt.t
856 |     type t   = {
857 |       categorical_values : elt array;
858 |       categorical_probs  : float array;
859 |       categorical_cumsum : Randist.discrete
860 |     }
861 | 
862 |     let create dist =
863 |       let n  = Array.length dist
864 |       and is =
865 |         Array.sort_index dist
866 |           ~cmp:(fun (v1, _p1) (v2, _p2) -> Elt.compare v1 v2)
867 |       in if n = 0 then invalid_arg "Categorical.Make: no data";
868 | 
869 |       let (v0, p0) = Array.(unsafe_get dist (unsafe_get is 0)) in
870 |       let vs       = Array.make n v0
871 |       and probs    = Array.make n p0 in begin
872 |         for i = 1 to n - 1 do
873 |           let (v, p) = Array.(unsafe_get dist (unsafe_get is i)) in
874 |           Array.unsafe_set vs i v;
875 |           Array.unsafe_set probs i p
876 |         done;
877 | 
878 |         (* Note(superbobry): ideally, we should check that given
879 |            probabilities sum up to 1., but I don't see how to do this
880 |            for floating point numbers. *)
881 |         {
882 |           categorical_values = vs;
883 |           categorical_probs  = probs;
884 |           categorical_cumsum = Randist.discrete_preproc probs
885 |         }
886 |       end
887 | 
888 |     let cumulative_probability { categorical_values; categorical_probs; _ } ~k =
889 |       match Base.search_sorted ~cmp:Elt.compare categorical_values k with
890 |         | Some pos ->
891 |           let acc = ref 0. in begin
892 |             for i = 0 to pos do
893 |               acc := !acc +. Array.unsafe_get categorical_probs i
894 |             done
895 |           end; !acc
896 |         | None ->
897 |           if Elt.compare k (Array.unsafe_get categorical_values 0) < 0
898 |           then 0.
899 |           else
900 |             (* Then it must be the case that forall vs : v > n. *)
901 |             1.
902 | 
903 |     let probability { categorical_values; categorical_probs; _ } ~k =
904 |       match Base.search_sorted ~cmp:Elt.compare categorical_values k with
905 |         | Some pos -> Array.unsafe_get categorical_probs pos
906 |         | None     -> 0.
907 |     let log_probability d ~k = log (probability d ~k)
908 | 
909 |     let random ?(rng=default_rng)
910 |         { categorical_values; categorical_cumsum; _ } =
911 |       let pos = Randist.discrete rng categorical_cumsum in
912 |       Array.unsafe_get categorical_values pos
913 |     let sample = make_sampler random
914 | 
915 |     let mle vs =
916 |       let n = Array.length vs in
917 |       if n = 0
918 |       then invalid_arg "Categorical.Make.mle: no data"
919 |       else
920 |         let counts = Hashtbl.create 8 in begin
921 |           for i = 0 to n - 1 do
922 |             let v = Array.unsafe_get vs i in
923 |             if Hashtbl.mem counts v
924 |             then Hashtbl.replace counts v (Hashtbl.find counts v + 1)
925 |             else Hashtbl.add counts v 1
926 |           done;
927 | 
928 |           let dist = Hashtbl.fold (fun k v dist ->
929 |               ((k, float_of_int v /. float_of_int n) :: dist)) counts []
930 |           in create (Array.of_list dist)
931 |         end
932 |   end
933 | end
934 | 
935 | 
936 | let normal = Normal.create
937 | and log_normal = LogNormal.create
938 | and uniform = Uniform.create
939 | and exponential = Exponential.create
940 | and chi_squared = ChiSquared.create
941 | and f = F.create
942 | and t = T.create
943 | and gamma = Gamma.create
944 | and cauchy = Cauchy.create
945 | and beta = Beta.create
946 | and logistic = Logistic.create
947 | 
948 | let poisson = Poisson.create
949 | and bernoulli = Bernoulli.create
950 | and binomial = Binomial.create
951 | and geometric = Geometric.create
952 | and hypergeometric = Hypergeometric.create
953 | and negative_binomial = NegativeBinomial.create
954 | 


--------------------------------------------------------------------------------
/lib/distributions.mli:
--------------------------------------------------------------------------------
  1 | (** Commonly used probability distributions. *)
  2 | 
  3 | module Features : sig
  4 |   (** Distribution features. *)
  5 |   module type S = sig
  6 |     type elt
  7 |     type t
  8 | 
  9 |     val mean     : t -> elt
 10 |     val variance : t -> elt
 11 |     val skewness : t -> elt
 12 |     val kurtosis : t -> elt
 13 |   end
 14 | 
 15 |   (** Distribution features, which are allowed to be undefined for some
 16 |       combinations of distribution parameters. *)
 17 |   module type Opt = sig
 18 |     type elt
 19 |     type t
 20 | 
 21 |     val mean_opt     : t -> elt option
 22 |     val variance_opt : t -> elt option
 23 |     val skewness_opt : t -> elt option
 24 |     val kurtosis_opt : t -> elt option
 25 |   end
 26 | end
 27 | 
 28 | module type DiscreteDistribution = sig
 29 |   type elt
 30 |   type t
 31 | 
 32 |   (** Samples a single point from the distribution. *)
 33 |   val random : ?rng:Gsl.Rng.t -> t -> elt
 34 | 
 35 |   (** Samples [size] data points from the distribution. *)
 36 |   val sample : ?rng:Gsl.Rng.t -> size:int -> t -> elt array
 37 | 
 38 |   (** Computes cumulative probability function for a given value [k],
 39 |       i. e. [P(X <= k)], the probability that a random variable [X] will
 40 |       be found at a value less than or equal to [k]. *)
 41 |   val cumulative_probability : t -> k:elt -> float
 42 | 
 43 |   (** Computes probability mass function for a given value [k], i. e.
 44 |       [P(X = k)], the probability that a random variable [X] is
 45 |       {b exactly} equal to [k] *)
 46 |   val probability : t -> k:elt -> float
 47 | 
 48 |   (** Computes natural logarithm of the probability mass function for
 49 |        a given value [k]. *)
 50 |   val log_probability : t -> k:elt -> float
 51 | end
 52 | 
 53 | module type ContinuousDistribution = sig
 54 |   type elt
 55 |   type t
 56 | 
 57 |   (** Samples a single point from the distribution. *)
 58 |   val random : ?rng:Gsl.Rng.t -> t -> elt
 59 | 
 60 |   (** Samples [size] data points from the distribution. *)
 61 |   val sample : ?rng:Gsl.Rng.t -> size:int -> t -> elt array
 62 | 
 63 |   (** Computes cumulative probability function for a given value [n],
 64 |       i. e. [P(X <= n)], the probability that a random variable [X] will
 65 |       be found at a value less than or equal to [n]. *)
 66 |   val cumulative_probability : t -> x:elt -> float
 67 | 
 68 |   (** Computes probability density function for a given value [n], i. e.
 69 |       [P(X = n)], the probability that a random variable [X] is
 70 |       {b exactly} equal to [n] *)
 71 |   val density : t -> x:elt -> float
 72 | 
 73 |   (** Computes natural logarithm of the probability density function for
 74 |       a given value [n]. *)
 75 |   val log_density  : t -> x:elt -> float
 76 | 
 77 |   (** Computes inverse cumulative probability function for a given
 78 |       probability [p]. *)
 79 |   val quantile : t -> p:float -> elt
 80 | end
 81 | 
 82 | 
 83 | (** {2 Continuous distributions} *)
 84 | 
 85 | (** The normal distribution. *)
 86 | module Normal : sig
 87 |   type t = {
 88 |     normal_mean : float;
 89 |     normal_sd   : float
 90 |   }
 91 | 
 92 |   include ContinuousDistribution with type t := t and type elt = float
 93 |   include Features.S with type t := t and type elt := float
 94 | 
 95 |   (** Creates normal distribution from parameters. *)
 96 |   val create   : mean:float -> sd:float -> t
 97 | 
 98 |   (** Standard normal distribution with 0 [mean] and [sd] equal to 1. *)
 99 |   val standard : t
100 | 
101 |   (** Creates normal distribution with a MLE of parameters, estimated
102 |       from given data. *)
103 |   val mle : float array -> t
104 | end
105 | 
106 | (** The log-normal distribution. *)
107 | module LogNormal : sig
108 |   type t = {
109 |     lognormal_mean : float;
110 |     lognormal_sd   : float
111 |   }
112 | 
113 |   include ContinuousDistribution with type t := t and type elt = float
114 |   include Features.S with type t := t and type elt := float
115 | 
116 |   (** Creates log-normal distribution from parameters. *)
117 |   val create : mean:float -> sd:float -> t
118 | 
119 |   (** Creates log-normal distribution with a MLE of parameters, estimated
120 |       from given data. *)
121 |   val mle : float array -> t
122 | end
123 | 
124 | (** Random variate distributed uniformly in the interval. *)
125 | module Uniform : sig
126 |   type t = {
127 |     uniform_lower : float;
128 |     uniform_upper : float
129 |   }
130 | 
131 |   include ContinuousDistribution with type t := t and type elt = float
132 |   include Features.S with type t := t and type elt := float
133 | 
134 |   (** Creates uniform distribution over a given interval. *)
135 |   val create : lower:float -> upper:float -> t
136 | 
137 |   (** Creates uniform distribution with a MLE of parameters, estimated
138 |       from given data. *)
139 |   val mle : float array -> t
140 | end
141 | 
142 | (** The exponential distribution.
143 | 
144 |     The probability distribution of the times between events in a Poisson
145 |     process, in which events occur continuously and independently at a
146 |     constant average [rate]. *)
147 | module Exponential : sig
148 |   type t = { exp_scale : float }
149 | 
150 |   include ContinuousDistribution with type t := t and type elt = float
151 |   include Features.S with type t := t and type elt := float
152 | 
153 |   (** Creates exponential distribution. [scale] must be positive. *)
154 |   val create : scale:float -> t
155 | 
156 |   (** Creates exponential distribution with a MLE of parameters, estimated
157 |       from given data. *)
158 |   val mle : float array -> t
159 | end
160 | 
161 | (** The chi-squared distribution.
162 | 
163 |     The probability distribution of sum of squares of [df] independent
164 |     standard normal distributions. *)
165 | module ChiSquared : sig
166 |   type t = { chisq_df : float }
167 | 
168 |   include ContinuousDistribution with type t := t and type elt = float
169 |   include Features.S with type t := t and type elt := float
170 | 
171 |   (** Creates chi-squared distribution. Number of degrees of freedom
172 |       must be positive. *)
173 |   val create : df:int -> t
174 | 
175 |   (** Creates chi-squared distribution with parameters, estimated with
176 |       method of moments. *)
177 |   val mme : float array -> t
178 | end
179 | 
180 | (** Fisher-Snedecor distribution. *)
181 | module F : sig
182 |   type t = {
183 |     f_df1 : float;
184 |     f_df2 : float
185 |   }
186 | 
187 |   include ContinuousDistribution with type t := t and type elt = float
188 |   include Features.Opt with type t := t and type elt := float
189 | 
190 |   (** Creates Fisher-Snedecor distribution with a given number of degrees
191 |       of freedom. *)
192 |   val create : df1:int -> df2:int -> t
193 | 
194 |   (** Creates Fisher-Snedecor distribution with parameters, estimated
195 |       with method of moments. *)
196 |   val mme : float array -> t
197 | end
198 | 
199 | (** Student's t-distribution. *)
200 | module T : sig
201 |   type t = { t_df : float }
202 | 
203 |   include ContinuousDistribution with type t := t and type elt = float
204 |   include Features.Opt with type t := t and type elt := float
205 | 
206 |   (** Creates Student's t-distribution with a given number of degrees
207 |       of freedom. *)
208 |   val create : df:float -> t
209 | 
210 |   (** Creates Student's t-distribution with parameters, estimated with
211 |       method of moments. *)
212 |   val mme : float array -> t
213 | end
214 | 
215 | (** The gamma distribution. *)
216 | module Gamma : sig
217 |   type t = {
218 |     gamma_shape : float;
219 |     gamma_scale : float
220 |   }
221 | 
222 |   include ContinuousDistribution with type t := t and type elt = float
223 |   include Features.S with type t := t and type elt := float
224 | 
225 |   (** Creates gamma distribution. Both shape and scale must be positive. *)
226 |   val create : shape:float -> scale:float -> t
227 | 
228 |   (** Creates gamma distribution with parameters, estimated with method
229 |       of moments. *)
230 |   val mme : float array -> t
231 | 
232 |   (** Creates gamma distribution with a MLE of parameters, estimated
233 |       from given data. *)
234 |   val mle : n_iter:int -> epsilon:float -> float array -> t
235 | end
236 | 
237 | (** The Cauchy-Lorentz distribution.
238 | 
239 |     It doesn't have mean and variance. *)
240 | module Cauchy : sig
241 |   type t = {
242 |     cauchy_location : float;
243 |     cauchy_scale    : float
244 |   }
245 | 
246 |   include ContinuousDistribution with type t := t and type elt = float
247 | 
248 |   (** Creates Cauchy-Lorentz distribution from parameters. *)
249 |   val create   : location:float -> scale:float -> t
250 | 
251 |   (** Cauchy-Lorentz distribution with 0 [location] and [scale] equal to 1. *)
252 |   val standard : t
253 | end
254 | 
255 | (** The beta distribution. *)
256 | module Beta : sig
257 |   type t = {
258 |     beta_alpha : float;
259 |     beta_beta  : float
260 |   }
261 | 
262 |   include ContinuousDistribution with type t := t and type elt = float
263 |   include Features.S with type t := t and type elt := float
264 | 
265 |   (** Creates beta distribution. Both shape parameters must be positive. *)
266 |   val create : alpha:float -> beta:float -> t
267 | 
268 |   (** Creates beta distribution with parameters, estimated with method
269 |       of moments. *)
270 |   val mme : float array -> t
271 | end
272 | 
273 | (** Logistic distribution. *)
274 | module Logistic : sig
275 |   type t = {
276 |     logistic_location : float;
277 |     logistic_scale    : float
278 |   }
279 | 
280 |   include ContinuousDistribution with type t := t and type elt = float
281 |   include Features.S with type t := t and type elt := float
282 | 
283 |   (** Creates logistic distribution. *)
284 |   val create : location:float -> scale:float -> t
285 | 
286 |   (** Creates logistic distribution with parameters, estimated with method
287 |       of moments. *)
288 |   val mme : float array -> t
289 | end
290 | 
291 | 
292 | (** {2 Discrete distributions} *)
293 | 
294 | (** The Poisson distribution.
295 | 
296 |     The probability distribution of a number of events occurring in a
297 |     fixed interval if these events occur with a known average [rate],
298 |     and occur independently from each other within that interval. *)
299 | module Poisson : sig
300 |   type t = { poisson_rate : float }
301 | 
302 |   include DiscreteDistribution with type t := t and type elt = int
303 |   include Features.S with type t := t and type elt := float
304 | 
305 |   (** Creates a Poisson distribution. [rate] must be positive. *)
306 |   val create : rate:float -> t
307 | 
308 |   (** Creates a Poisson distribution with a MLE of parameters, estimated
309 |       from given data. *)
310 |   val mle : int array -> t
311 | end
312 | 
313 | (** Bernoulli distribution.
314 | 
315 |     The probability distribution, which takes value [1] with success
316 |     probability [p] and value [0] with failure probability [1 - p]. *)
317 | module Bernoulli : sig
318 |   type t = { bernoulli_p : float }
319 | 
320 |   include DiscreteDistribution with type t := t and type elt = int
321 |   include Features.S with type t := t and type elt := float
322 | 
323 |   (** Creates Bernoulli distribution with given success probability [p]. *)
324 |   val create : p:float -> t
325 | 
326 |   (** Creates a Bernoulli distribution with a MLE of parameters, estimated
327 |       from given data. *)
328 |   val mle : int array -> t
329 | end
330 | 
331 | (** The binomial distribution.
332 | 
333 |     The probability distribution of the number of successes in a sequence
334 |     of independent Bernoulli [trials]. *)
335 | module Binomial : sig
336 |   type t = {
337 |     binomial_trials : int;
338 |     binomial_p      : float
339 |   }
340 | 
341 |   include DiscreteDistribution with type t := t and type elt = int
342 |   include Features.S with type t := t and type elt := float
343 | 
344 |   (** Creates binomial distribution. Number of [trials] must be
345 |       non-negative. *)
346 |   val create : trials:int -> p:float -> t
347 | 
348 |   (** Creates binomial distribution with parameters, estimated with
349 |       method of moments. *)
350 |   val mme : int array -> t
351 | end
352 | 
353 | (** The Geometric distribution.
354 | 
355 |     The probability distribution of the number of failures before the
356 |     first success, supported on the set [[0, 1, ...]]. *)
357 | module Geometric : sig
358 |   type t = { geometric_p : float }
359 | 
360 |   include DiscreteDistribution with type t := t and type elt = int
361 |   include Features.S with type t := t and type elt := float
362 | 
363 |   (** Creates Geometric distribution with a given probability of success. *)
364 |   val create : p:float -> t
365 | 
366 |   (** Creates Geometric distribution with parameters, estimated with
367 |       method of moments. *)
368 |   val mme : int array -> t
369 | end
370 | 
371 | (** The Hypergeometric distribution.
372 | 
373 |     The probability distribution of obtaining [k] elements of "type 1"
374 |     in [t] samples from a population {b without} replacement, if the
375 |     population contains [m] elements of "type 1" and [t - m] elements
376 |     of "type 2". *)
377 | module Hypergeometric : sig
378 |   type t = {
379 |     hyper_m : int;
380 |     hyper_t : int;
381 |     hyper_k : int
382 |   }
383 | 
384 |   include DiscreteDistribution with type t := t and type elt = int
385 |   include Features.S with type t := t and type elt := float
386 | 
387 |   (** Creates Hypergeometric distribution. *)
388 |   val create : m:int -> t:int -> k:int -> t
389 | end
390 | 
391 | (** Negative Binomial distribution.
392 | 
393 |     The probability distribution of the number of succeses in a sequence
394 |     of Bernoulli trials before a specified  number of [failures] occurs. *)
395 | module NegativeBinomial : sig
396 |   type t = {
397 |     nbinomial_failures : float;
398 |     nbinomial_p        : float
399 |   }
400 | 
401 |   include DiscreteDistribution with type t := t and type elt = int
402 |   include Features.S with type t := t and type elt := float
403 | 
404 |   (** Creates negative Binomial distribution with a given number of
405 |       failures and success probability. *)
406 |   val create : failures:float -> p:float -> t
407 | 
408 |   (** Creates negative Binomial distribution with parameters, estimated
409 |       with method of moments. *)
410 |   val mme : int array -> t
411 | end
412 | 
413 | module Categorical : sig
414 |   module type OrderedType = sig
415 |     type t
416 | 
417 |     val compare : t -> t -> int
418 |   end
419 | 
420 |   module type S = sig
421 |     include DiscreteDistribution
422 | 
423 |     (** Creates a categorical distribution over values of type [elt],
424 |         where each value is given a probability, which defaults to [0]
425 |         for values not in the list. *)
426 |     val create : (elt * float) array -> t
427 | 
428 |     (** Creates a categorical distribution with a MLE of parameters,
429 |         estimated from given data. *)
430 |     val mle : elt array -> t
431 |   end
432 | 
433 |   module Make
434 |     :  functor (Elt : Map.OrderedType)
435 |     -> S with type elt = Elt.t
436 | end
437 | 
438 | (** {2 Shortcuts for creating distributions} *)
439 | 
440 | val normal : mean:float -> sd:float -> Normal.t
441 | val log_normal : mean:float -> sd:float -> LogNormal.t
442 | val uniform  : lower:float -> upper:float -> Uniform.t
443 | val exponential : scale:float -> Exponential.t
444 | val chi_squared : df:int -> ChiSquared.t
445 | val f : df1:int -> df2:int -> F.t
446 | val t : df:float -> T.t
447 | val gamma  : shape:float -> scale:float -> Gamma.t
448 | val cauchy : location:float -> scale:float -> Cauchy.t
449 | val beta : alpha:float -> beta:float -> Beta.t
450 | val logistic : location:float -> scale:float -> Logistic.t
451 | 
452 | val poisson : rate:float -> Poisson.t
453 | val bernoulli : p:float -> Bernoulli.t
454 | val binomial : trials:int -> p:float -> Binomial.t
455 | val geometric : p:float -> Geometric.t
456 | val hypergeometric : m:int -> t:int -> k:int -> Hypergeometric.t
457 | val negative_binomial : failures:float -> p:float -> NegativeBinomial.t
458 | 


--------------------------------------------------------------------------------
/lib/internal.ml:
--------------------------------------------------------------------------------
  1 | open StdLabels
  2 | 
  3 | let default_rng = let open Gsl.Rng in
  4 |   env_setup ();
  5 |   make (default ())
  6 | 
  7 | let find_root_newton ~n_iter ~epsilon ~init gsl_fun =
  8 |   let open Gsl.Root.Polish in
  9 |   let solver = make NEWTON gsl_fun init in begin
 10 |     let counter = ref n_iter
 11 |     and r0 = ref nan
 12 |     and r1 = ref init in begin
 13 |       while !counter > 0 &&
 14 |             (!r0 <> !r0 || abs_float (!r0 -. !r1) > epsilon)
 15 |       do
 16 |         iterate solver;
 17 |         decr counter;
 18 |         r0 := !r1;
 19 |         r1 := root solver
 20 |         done
 21 |       end; !r1
 22 |   end
 23 | 
 24 | let sqr x = x *. x
 25 | let cube x = x *. x *. x
 26 | 
 27 | let is_nan (x : float) = x <> x
 28 | let is_not_nan (x : float) = x = x
 29 | 
 30 | let round x = int_of_float (floor (x +. 0.5))
 31 | 
 32 | 
 33 | module Array = struct
 34 |   include Array
 35 | 
 36 |   let sort_index ~cmp vs =
 37 |     let n     = length vs in
 38 |     let order = Array.make n 0 in begin
 39 |       for i = 0 to n - 1 do
 40 |         Array.unsafe_set order i i
 41 |       done;
 42 | 
 43 |       sort ~cmp:(fun i j -> cmp (unsafe_get vs i) (unsafe_get vs j)) order;
 44 |       order
 45 |     end
 46 | 
 47 |   let count ~f =
 48 |     fold_left ~f:(fun acc v -> acc + if f v then 1 else 0) ~init:0
 49 | 
 50 |   let exists ~f vs =
 51 |     let rec loop i =
 52 |       if i < 0
 53 |       then false
 54 |       else f (unsafe_get vs i) || loop (pred i)
 55 |     in loop (length vs - 1)
 56 | 
 57 |   let for_all ~f vs =
 58 |     let rec loop i =
 59 |       if i < 0
 60 |       then true
 61 |       else f (unsafe_get vs i) && loop (pred i)
 62 |     in loop (length vs - 1)
 63 | 
 64 |   let partition ~f vs =
 65 |     let (l, r) = fold_left vs
 66 |         ~init:([], [])
 67 |         ~f:(fun (l, r) x -> if f x then (x :: l, r) else (l, x :: r))
 68 |     in (Array.of_list l, Array.of_list r)
 69 | end
 70 | 
 71 | module Matrix_flat = struct
 72 |   include Gsl.Matrix_flat
 73 | 
 74 |   let exists m ~f =
 75 |     let (nrow, ncol) = dims m in
 76 |     let rec loop_columns i j =
 77 |       if j < 0
 78 |       then false
 79 |       else f (get m i j) || loop_columns i (pred j)
 80 |     and loop_rows i =
 81 |       if i < 0
 82 |       then false
 83 |       else loop_columns i (ncol - 1) || loop_rows (pred i)
 84 |     in loop_rows (nrow - 1)
 85 | 
 86 |   let map m ~f =
 87 |     let (nrow, ncol) = dims m in begin
 88 |       for i = 0 to nrow - 1 do
 89 |         for j = 0 to ncol - 1 do
 90 |           set m i j (f (get m i j))
 91 |         done
 92 |       done
 93 |     end
 94 | 
 95 |   let row_sums m =
 96 |     let (nrow, ncol) = dims m in
 97 |     let res = create ~init:0. 1 nrow in begin
 98 |       for i = 0 to nrow - 1 do
 99 |         for j = 0 to ncol - 1 do
100 |           set res 0 i (get res 0 i +. get m i j)
101 |         done
102 |       done; res
103 |     end
104 |   and col_sums m =
105 |     let (nrow, ncol) = dims m in
106 |     let res = create ~init:0. 1 ncol in begin
107 |       for j = 0 to ncol - 1 do
108 |         for i = 0 to nrow - 1 do
109 |           set res 0 j (get res 0 j +. get m i j)
110 |         done
111 |       done; res
112 |     end
113 | 
114 |   let sum m =
115 |     let (nrow, ncol) = dims m in
116 |     let acc = ref 0. in begin
117 |       for i = 0 to nrow - 1 do
118 |         for j = 0 to ncol - 1 do
119 |           acc := !acc +. get m i j
120 |         done
121 |       done; ! acc
122 |     end
123 | 
124 |   let power m =
125 |     let open Gsl.Blas_flat in
126 |     let mul a b =
127 |       assert (dims a = dims b);
128 |       let (w, h) = dims a in
129 |       let res = create ~init:0. w h in begin
130 |         gemm ~ta:NoTrans ~tb:NoTrans ~alpha:1. ~beta:1. ~a ~b ~c:res;
131 |         res
132 |       end
133 |     in
134 | 
135 |     let rec go m = function
136 |       | 1 -> m
137 |       | k ->
138 |         let m2 = go m (k / 2) in
139 |         let mm = mul m2 m2 in
140 |         if k mod 2 = 0
141 |         then mm
142 |         else mul mm m
143 |     in go m
144 | end
145 | 


--------------------------------------------------------------------------------
/lib/pareto.mldylib:
--------------------------------------------------------------------------------
1 | # OASIS_START
2 | # DO NOT EDIT (digest: b71122ac52aff447e1216d0df0528f81)
3 | Pareto
4 | # OASIS_STOP
5 | 


--------------------------------------------------------------------------------
/lib/pareto.mllib:
--------------------------------------------------------------------------------
1 | # OASIS_START
2 | # DO NOT EDIT (digest: b71122ac52aff447e1216d0df0528f81)
3 | Pareto
4 | # OASIS_STOP
5 | 


--------------------------------------------------------------------------------
/lib/pareto.mlpack:
--------------------------------------------------------------------------------
 1 | # OASIS_START
 2 | # DO NOT EDIT (digest: c9403007191710ce601dbcb0c2f6e00c)
 3 | Base
 4 | Distributions
 5 | Resampling
 6 | Sample
 7 | Tests
 8 | Internal
 9 | # OASIS_STOP
10 | 


--------------------------------------------------------------------------------
/lib/resampling.ml:
--------------------------------------------------------------------------------
 1 | open Internal
 2 | 
 3 | 
 4 | let jackknife ~estimator vs =
 5 |   let n = Array.length vs in
 6 |   if n = 0
 7 |   then [||]
 8 |   else
 9 |     let init = Array.unsafe_get vs 0 in
10 |     Array.init n ~f:(fun i ->
11 |         let holey = Array.make (n - 1) init in begin
12 |           Array.blit ~src:vs ~src_pos:0 ~dst:holey ~dst_pos:0 ~len:i;
13 |           Array.blit ~src:vs ~src_pos:(i + 1)
14 |             ~dst:holey ~dst_pos:i ~len:(n - i - 1);
15 |           estimator holey
16 |         end)
17 | 
18 | 
19 | let resample ?rng ~estimator ~n_iter vs =
20 |   Array.init n_iter
21 |     ~f:(fun _i -> estimator (Base.sample ?rng ~replace:true vs))
22 | 
23 | 
24 | module Bootstrap = struct
25 |   type estimate = {
26 |     point            : float;
27 |     lower_bound      : float;
28 |     upper_bound      : float;
29 |     confidence_level : float
30 |   }
31 | 
32 |   let bca ?rng ?(confidence_level=0.95) ~estimator ~n_iter vs =
33 |     if confidence_level <= 0. || confidence_level >= 1.
34 |     then invalid_arg "Bootstrap.bca: confidence level must be in range (0, 1)";
35 | 
36 |     let point = estimator vs in
37 |     if Array.length vs = 1
38 |     then { point; lower_bound = point; upper_bound = point; confidence_level }
39 |     else
40 |       let rvs = resample ?rng ~estimator ~n_iter vs in
41 |       Array.sort ~cmp:compare rvs;  (* Sort, since we need percentiles. *)
42 | 
43 |       let jack        = jackknife ~estimator vs in
44 |       let jack_mean   = Sample.mean jack in
45 |       let z_2s        = Array.map jack ~f:(fun v -> sqr (jack_mean -. v)) in
46 |       let sum_cubes   = Array.fold_left z_2s ~init:0.
47 |           ~f:(fun acc z_2 -> acc +. sqr z_2)
48 |       and sum_squares = Array.fold_left z_2s ~f:(+.) ~init:0. in
49 |       let accel = sum_cubes /. (6. *. (sum_squares ** 1.5)) in
50 |       let p     = float_of_int (Array.count ~f:(fun v -> v < point) rvs) /.
51 |                     float_of_int n_iter
52 |       in
53 | 
54 |       let open Distributions.Normal in
55 |       let bias = quantile standard ~p
56 |       and z    = quantile standard ~p:((1. -. confidence_level) /. 2.) in
57 |       let b1 = bias +. z
58 |       and b2 = bias -. z in
59 |       let a1 = bias +. b1 /. (1. -. accel *. b1)
60 |       and a2 = bias +. b2 /. (1. -. accel *. b2) in
61 | 
62 |       let pnorm x =
63 |         round (cumulative_probability standard ~x *. float_of_int n_iter)
64 |       in {
65 |         point;
66 |         lower_bound = rvs.(max (pnorm a1) 0);
67 |         upper_bound = rvs.(min (pnorm a2) (n_iter - 1));
68 |         confidence_level
69 |       }
70 | end
71 | 


--------------------------------------------------------------------------------
/lib/resampling.mli:
--------------------------------------------------------------------------------
 1 | (** Resampling statistics. *)
 2 | 
 3 | (** Repeatidly resamples a given data set with replacement, computing a
 4 |     statistical estimate over the resampled data. *)
 5 | val resample
 6 |   :  ?rng:Gsl.Rng.t
 7 |   -> estimator:('a array -> 'b)
 8 |   -> n_iter:int
 9 |   -> 'a array
10 |   -> 'b array
11 | 
12 | (** Repeatidly computes a statistical estimate over the data set, leaving
13 |     out a single observation at a time. *)
14 | val jackknife : estimator:('a array -> 'b) -> 'a array -> 'b array
15 | 
16 | 
17 | module Bootstrap : sig
18 |   type estimate = {
19 |     point            : float;  (** Point estimate. *)
20 |     lower_bound      : float;  (** Lower bound of the estimate confidence
21 |                                    interval *)
22 |     upper_bound      : float;  (** Upper bound of the estimate confidence
23 |                                    interval *)
24 |     confidence_level : float   (** Condifence level corresponding to the
25 |                                    above intervals. *)
26 |   }
27 | 
28 |   (** Bias-corrected and accelerated (BCa) bootstrap. *)
29 |   val bca
30 |     :  ?rng:Gsl.Rng.t
31 |     -> ?confidence_level:float
32 |     -> estimator:(float array -> float)
33 |     -> n_iter:int
34 |     -> float array
35 |     -> estimate
36 | end
37 | 


--------------------------------------------------------------------------------
/lib/sample.ml:
--------------------------------------------------------------------------------
  1 | open Internal
  2 | 
  3 | module Stats = Gsl.Stats
  4 | module Histo = Gsl.Histo
  5 | 
  6 | let min = Stats.min
  7 | and max = Stats.max
  8 | and minmax = Stats.minmax
  9 | let range vs =
 10 |   let (min, max) = minmax vs in
 11 |   max -. min
 12 | 
 13 | let moments k vs =
 14 |   let n = Array.length vs in
 15 |   if n = 0
 16 |   then Array.make k nan
 17 |   else
 18 |     let ms = Array.make k 0. in begin
 19 |       for i = 0 to n - 1 do
 20 |         let v   = Array.unsafe_get vs i in
 21 |         let acc = ref v in
 22 |         for p = 0 to k - 1 do
 23 |           Array.unsafe_set ms p
 24 |             (Array.unsafe_get ms p +. !acc);
 25 |           acc := !acc *. v
 26 |         done
 27 |       done;
 28 | 
 29 |       for p = 0 to k - 1 do
 30 |         Array.unsafe_set ms p (Array.unsafe_get ms p /. float_of_int n)
 31 |       done; ms
 32 |     end
 33 | 
 34 | let mean vs = Stats.mean vs
 35 | let variance ?mean vs = Stats.variance ?mean vs
 36 | let sd ?mean vs = Stats.sd ?mean vs
 37 | 
 38 | let skewness ?mean ?sd vs = match (mean, sd) with
 39 |   | (Some mean, Some sd) -> Stats.skew_m_sd ~mean ~sd vs
 40 |   | (Some mean, None)    -> Stats.skew_m_sd ~mean ~sd:(Stats.sd ~mean vs) vs
 41 |   | (None, Some sd)      -> Stats.skew_m_sd ~mean:(Stats.mean vs) ~sd vs
 42 |   | (None, None)         -> Stats.skew vs
 43 | 
 44 | let kurtosis ?mean ?sd vs = match (mean, sd) with
 45 |   | (Some mean, None)    ->
 46 |     Stats.kurtosis_m_sd ~mean ~sd:(Stats.sd ~mean vs) vs
 47 |   | (None, Some sd)      ->
 48 |     Stats.kurtosis_m_sd ~mean:(Stats.mean vs) ~sd vs
 49 |   | (Some mean, Some sd) -> Stats.kurtosis_m_sd ~mean ~sd vs
 50 |   | (None, None)         -> Stats.kurtosis vs
 51 | 
 52 | 
 53 | let _resolve_ties next d = function
 54 |   | `Average    -> float_of_int next -. float_of_int d /. 2.
 55 |   | `Min        -> float_of_int (next - d)
 56 |   | `Max        -> float_of_int next
 57 | 
 58 | let _correct_ties ranks =
 59 |   let n = Array.length ranks in
 60 |   if n < 2
 61 |   then 1.0
 62 |   else
 63 |     let sorted = Array.copy ranks
 64 |     and t = ref 0
 65 |     and d = ref 0
 66 |     and i = ref 0 in begin
 67 |       Array.sort ~cmp:(fun r1 r2 ->
 68 |           compare (int_of_float r1) (int_of_float r2)) sorted;
 69 | 
 70 |       while !i < n - 1 do
 71 |         if sorted.(!i) = sorted.(!i + 1)
 72 |         then begin
 73 |           d := 1;
 74 |           while !i < n - 1 && sorted.(!i) = sorted.(!i + 1) do
 75 |             incr d;
 76 |             incr i;
 77 |           done;
 78 | 
 79 |           t := !t + (!d * !d * !d - !d)
 80 |         end; incr i
 81 |       done; float_of_int !t
 82 |     end
 83 | 
 84 | let rank ?(ties_strategy=`Average) ?(cmp=compare) vs =
 85 |   let n     = Array.length vs in
 86 |   let order = Array.sort_index ~cmp vs in
 87 |   let ranks = Array.make n 0. in
 88 |   let d     = ref 0 in begin
 89 |     for i = 0 to n - 1 do
 90 |       if i == n - 1 || cmp vs.(order.(i)) vs.(order.(i + 1)) <> 0
 91 |       then
 92 |         let tie_rank = _resolve_ties (i + 1) !d ties_strategy in
 93 |         for j = i - !d to i do
 94 |           ranks.(order.(j)) <- tie_rank
 95 |         done;
 96 |         d := 0
 97 |       else
 98 |         incr d  (* Found a duplicate! *)
 99 |     done;
100 |   end; (_correct_ties ranks, ranks)
101 | 
102 | 
103 | let histogram ?(n_bins=10) ?range ?weights ?(density=false) vs =
104 |   if n_bins <= 0
105 |   then invalid_arg "Sample.histogram: n_bins must be a positive integer"
106 |   else if vs = [||] then invalid_arg "Sample.histogram: no data"
107 |   else begin
108 |     match weights with
109 |       | Some weights when Array.length weights <> Array.length vs ->
110 |         invalid_arg "Sample.histogram: expected one weight per value"
111 |       | _ -> ()
112 |   end;
113 | 
114 |   let h = Histo.make n_bins in begin
115 |     match range with
116 |       | None ->
117 |         let (min, max) = minmax vs in
118 |         let d =
119 |           if n_bins = 1
120 |           then 0.
121 |           else (max -. min) /. float_of_int ((n_bins - 1) * 2)
122 |         in Histo.set_ranges_uniform h ~xmin:(min -. d) ~xmax:(max +. d)
123 |       | Some (min, max) when min < max ->
124 |         Histo.set_ranges_uniform h ~xmin:min ~xmax:max
125 |       | Some (_min, _max) ->
126 |         invalid_arg ("Sample.histogram: max must be larger than min " ^
127 |                        " in range paramter")
128 |   end;
129 | 
130 |   for i = 0 to Array.length vs - 1 do
131 |     let w = match weights with
132 |       | None -> 1.
133 |       | Some weights -> Array.unsafe_get weights i
134 |     in Histo.accumulate h ~w (Array.unsafe_get vs i)
135 |   done;
136 | 
137 |   let counts = Array.make n_bins 0. in
138 |   let points = Array.make n_bins 0. in begin
139 |     if density then Histo.scale h (1. /. Histo.sum h);
140 |     for i = 0 to n_bins - 1 do
141 |       Array.unsafe_set counts i (Histo.get h i);
142 |       Array.unsafe_set points i (fst (Histo.get_range h i))
143 |     done; (points, counts)
144 |   end
145 | 
146 | 
147 | module Quantile = struct
148 |   type continuous_param =
149 |     | CADPW
150 |     | Hazen
151 |     | SPSS
152 |     | S
153 |     | MedianUnbiased
154 |     | NormalUnbiased
155 | 
156 |   let continuous_by ?(param=S) ~ps vs =
157 |     if Array.exists ~f:(fun p -> p < 0. || p > 1.) ps
158 |     then invalid_arg "Quantile.continuous_by: p must be in range [0, 1]";
159 |     if Array.exists ~f:is_nan vs
160 |     then invalid_arg "Quantile.continuous_by: sample contains NaNs";
161 | 
162 |     let (a, b) = match param with
163 |       | CADPW -> (0., 1.)
164 |       | Hazen -> (0.5, 0.5)
165 |       | SPSS  -> (0., 0.)
166 |       | S     -> (1., 1.)
167 |       | MedianUnbiased -> (1. /. 3., 1. /. 3.)
168 |       | NormalUnbiased -> (3. /. 8., 3. /. 8.)
169 |     in
170 | 
171 |     let n     = Array.length vs in
172 |     let fuzz  = epsilon_float *. 4.
173 |     and js    = Array.make n 0.
174 |     and hs    = Array.make n 0. in begin
175 |       for i = 0 to Array.length ps - 1 do
176 |         let p    = Array.unsafe_get ps i in
177 |         let nppm = a +. p *. (float_of_int n +. 1. -. a -. b) in
178 |         let j    = floor (nppm +. fuzz) in
179 |         let h    = if abs_float (nppm -. j) < fuzz
180 |                    then 0.
181 |                    else nppm -. j
182 |         in begin
183 |           Array.unsafe_set js i j;
184 |           Array.unsafe_set hs i h;
185 |         end
186 |       done
187 |     end;
188 | 
189 |     let bound ?(a=0) ~b i = Pervasives.min (Pervasives.max i a) b in
190 |     let svs = Gsl.Gsl_sort.vector_flat_smallest
191 |         (bound ~b:n (int_of_float (max js) + 1))
192 |         (Gsl.Vector_flat.of_array vs) in
193 |     let item = fun i -> svs.(bound ~b:(n - 1) i)
194 |     and qs  = Array.make (Array.length ps) 0. in begin
195 |       for i = 0 to Array.length ps - 1 do
196 |         let j = int_of_float (Array.unsafe_get js i) in
197 |         let h = Array.unsafe_get hs i in
198 |         let q = (1. -. h) *. item (j - 1) +. h *. item j in
199 |         Array.unsafe_set qs i q
200 |       done; qs
201 |     end
202 | 
203 |   let iqr ?param vs =
204 |     match continuous_by ?param ~ps:[|0.25; 0.75|] vs with
205 |       | [|q25; q75|] -> q75 -. q25
206 |       | _            -> assert false  (* Impossible. *)
207 | end
208 | 
209 | let quantile ~ps vs = Quantile.continuous_by ~ps vs
210 | 
211 | let iqr vs = Quantile.iqr vs
212 | 
213 | 
214 | module KDE = struct
215 |   type bandwidth =
216 |     | Silverman
217 |     | Scott
218 | 
219 |   type kernel =
220 |     | Gaussian
221 | 
222 |   let build_kernel = function
223 |     | Gaussian ->
224 |       fun h p v ->
225 |         let u = (v -. p) /. h in
226 |         let open Gsl.Math in
227 |         (1. /. (sqrt2 *. sqrtpi)) *. exp (-. sqr u /. 2.)
228 | 
229 |   let build_points n_points h kernel vs =
230 |     let (min, max) = minmax vs in
231 |     let (a, b)     = match kernel with
232 |       | Gaussian -> (min -. 3. *. h, max +. 3. *. h)
233 |     in
234 | 
235 |     let points = Array.make n_points 0.
236 |     and step   = (b -. a) /. float_of_int n_points in begin
237 |       for i = 0 to n_points - 1 do
238 |         Array.unsafe_set points i (a +. (float_of_int i) *. step)
239 |       done; points
240 |     end
241 | 
242 |   let estimate_pdf ?(kernel=Gaussian) ?(bandwidth=Scott) ?(n_points=512) vs =
243 |     if Array.length vs < 2
244 |     then invalid_arg "KDE.estimate_pdf: sample should have multiple elements";
245 | 
246 |     let n = float_of_int (Array.length vs) in
247 |     let s = Pervasives.min (sd vs) (iqr vs /. 1.34) in
248 |     let h = match bandwidth with
249 |       | Silverman  -> 0.90 *. s *. (n ** -0.2)
250 |       | Scott      -> 1.06 *. s *. (n ** -0.2)
251 |     in
252 | 
253 |     let points = build_points n_points h kernel vs in
254 |     let k      = build_kernel kernel in
255 |     let f      = 1. /. (h *. n) in
256 |     let pdf    = Array.make n_points 0. in begin
257 |       for i = 0 to n_points - 1 do
258 |         let p = Array.unsafe_get points i in
259 |         Array.unsafe_set pdf i
260 |           (f *. Array.fold_left ~f:(fun acc v -> acc +. k h p v) ~init:0. vs)
261 |       done; (points, pdf)
262 |     end
263 | end
264 | 
265 | 
266 | module Correlation = struct
267 |   let pearson v1 v2 =
268 |     let n = Array.length v1 in
269 |     if Array.length v2 <> n
270 |     then invalid_arg "Correlation.pearson: unequal length arrays";
271 | 
272 |     let v1_mean = mean v1
273 |     and v2_mean = mean v2 in
274 |     let v12_sd  = sd ~mean:v1_mean v1 *. sd ~mean:v2_mean v2
275 |     and acc     = ref 0. in
276 |     for i = 0 to n - 1 do
277 |       let v1 = Array.unsafe_get v1 i
278 |       and v2 = Array.unsafe_get v2 i
279 |       in acc := !acc +. (v1 -. v1_mean) *. (v2 -. v2_mean)
280 |     done; !acc /. float_of_int (n - 1) /. v12_sd
281 | 
282 |   let spearman ?cmp v1 v2 =
283 |     let n = Array.length v1 in
284 |     if Array.length v2 <> n
285 |     then invalid_arg "Correlation.spearman: unequal length arrays"
286 |     else
287 |       (* Note(superbobry): according to Wikipedia, ties strategy is
288 |          fixed to [`Average]. *)
289 |       let f vs = snd (rank ~ties_strategy:`Average ?cmp vs) in
290 |       pearson (f v1) (f v2)
291 | 
292 |   module Auto = struct
293 |     let pearson vs =
294 |       let n = Array.length vs in
295 |       if n < 2
296 |       then [||]
297 |       else
298 |         let mean = Stats.mean vs in
299 |         let acf shift =
300 |           let acc = ref 0. in
301 |           for i = 0 to n - shift - 1 do
302 |             let v_i = Array.unsafe_get vs i
303 |             and v_s = Array.unsafe_get vs (i + shift) in
304 |             acc := !acc +. (v_s -. mean) *. (v_i -. mean)
305 |           done; !acc /. float_of_int n
306 |         in
307 | 
308 |         let ac  = Array.init n ~f:acf in
309 |         let ac0 = ac.(0) in begin
310 |           for i = 0 to n - 1 do
311 |             Array.unsafe_set ac i (Array.unsafe_get ac i /. ac0)
312 |           done; ac
313 |         end
314 |   end
315 | end
316 | 
317 | 
318 | module Summary = struct
319 |   type t = {
320 |     k     : int;
321 |     m_1   : float;
322 |     m_2   : float;
323 |     m_3   : float;
324 |     m_4   : float;
325 |     max_k : float;
326 |     min_k : float
327 |   }
328 | 
329 |   let empty = {
330 |     m_1   = 0.0;
331 |     m_2   = 0.0;
332 |     m_3   = 0.0;
333 |     m_4   = 0.0;
334 |     k     = 0;
335 |     max_k = min_float;
336 |     min_k = max_float;
337 |   }
338 | 
339 |   let combine t1 t2 =
340 |     if t1.k = 0
341 |     then t2
342 |     else if t2.k = 0
343 |     then t1
344 |     else
345 |       (* Note(superbobry): we can optimize it later, but right now I'd
346 |          rather have these formulas match Wikipedia _directly_. *)
347 |       let delta = t2.m_1 -. t1.m_1
348 |       and t1k   = float_of_int t1.k
349 |       and t2k   = float_of_int t2.k
350 |       and tnk   = float_of_int (t1.k + t2.k)
351 |       in {
352 |         m_1 = (t1k *. t1.m_1 +. t2k *. t2.m_1) /. tnk;
353 |         m_2 = t1.m_2 +. t2.m_2 +. sqr delta *. t1k *. t2k /. tnk;
354 |         m_3 = t1.m_3 +. t2.m_3
355 |               +. cube delta *. t1k *. t2k *. (t1k -. t2k) /. sqr tnk
356 |               +. 3. *. delta *. (t1k *. t2.m_2 -. t2k *. t1.m_2) /. tnk;
357 |         m_4 = t1.m_4 +. t2.m_4
358 |               +. ((delta ** 4.) *. t1k *. t2k *.
359 |                   (sqr t1k -. t1k *. t2k +. sqr t2k) /. cube tnk)
360 |               +. (6. *. sqr delta *.
361 |                   (sqr t1k *. t2.m_2 +. sqr t2k *.  t1.m_2) /. sqr tnk)
362 |               +. (4. *. delta *. (t1k *. t2.m_3 -. t2k *. t1.m_3) /. tnk);
363 |         k = t1.k + t2.k;
364 |         max_k = Pervasives.max t1.max_k t2.max_k;
365 |         min_k = Pervasives.min t1.min_k t2.min_k
366 |       }
367 | 
368 |   let add t x_k =
369 |     let n_k       = float_of_int (succ t.k) in
370 |     let delta     = x_k -. t.m_1 in
371 |     let delta_nk  = delta /. n_k in
372 |     let delta_m_2 = delta *. delta_nk *. float_of_int t.k in
373 |     {
374 |       m_1   = t.m_1 +. delta_nk;
375 |       m_2   = t.m_2 +. delta_m_2;
376 |       m_3   = t.m_3 +.
377 |                 delta_m_2 *. delta_nk *. (n_k -. 2.) -.
378 |                 (3. *. delta_nk *. t.m_2);
379 |       m_4   = t.m_4 +.
380 |                 delta_m_2 *. sqr delta_nk *. (sqr n_k -. 3. *. n_k +. 3.) +.
381 |                 6. *. sqr delta_nk *. t.m_2 -.
382 |                 4. *. delta_nk *. t.m_3;
383 |       k     = succ t.k;
384 |       min_k = Pervasives.min t.min_k x_k;
385 |       max_k = Pervasives.max t.max_k x_k;
386 |     }
387 | 
388 |   let size { k; _ } = k
389 | 
390 |   let min { min_k; _ } = if min_k = max_float then nan else min_k
391 |   and max { max_k; _ } = if max_k = min_float then nan else max_k
392 | 
393 |   let mean t = if t.k > 0 then t.m_1 else nan
394 | 
395 |   let variance t = match t.k with
396 |     (* Note(superbobry): we follow R and GSL here and treat variance
397 |        of a single number undefined. *)
398 |     | 0 | 1 -> nan
399 |     | _ -> t.m_2 /. float_of_int (pred t.k)
400 |   let sd t = sqrt (variance t)
401 | 
402 |   let skewness t =
403 |     if t.k = 0
404 |     then nan
405 |     else sqrt (float_of_int t.k) *. t.m_3 /. (t.m_2 ** 1.5)
406 |   and kurtosis t =
407 |     if t.k = 0
408 |     then nan
409 |     else float_of_int t.k *. t.m_4 /. (t.m_2 *.  t.m_2) -. 3.
410 | end
411 | 


--------------------------------------------------------------------------------
/lib/sample.mli:
--------------------------------------------------------------------------------
  1 | (** Commonly used sample statistics. *)
  2 | 
  3 | val min    : float array -> float
  4 | val max    : float array -> float
  5 | val minmax : float array -> (float * float)
  6 | 
  7 | (** {e O(n)} Computes sample's range, i. e. the difference between the
  8 |     largest and smallest elements of a sample. *)
  9 | val range : float array -> float
 10 | 
 11 | (** {e O(n k)} Computes an array of sample moments of order 1 to k, i. e.
 12 |     [E{X^1}, E{X^2}, ..., E{X^k}]. *)
 13 | val moments : int -> float array -> float array
 14 | 
 15 | (** {e O(n)} Computes sample's arithmetic mean. *)
 16 | val mean : float array -> float
 17 | 
 18 | (** {e O(n)} Computes unbiased estimate of a sample's variance, also
 19 |     known as the {e sample variance}, where the denominator is [n - 1]. *)
 20 | val variance : ?mean:float -> float array -> float
 21 | 
 22 | (** {e O(n)} Computes sample's standard deviation. *)
 23 | val sd : ?mean:float -> float array -> float
 24 | 
 25 | (** {e O(n)} Computes the skewness of a sample, which is a measure of
 26 |     asymmetry of its distribution. *)
 27 | val skewness : ?mean:float -> ?sd:float -> float array -> float
 28 | 
 29 | (** {e O(n)} Computes the excess kurtosis of a sample, which is a
 30 |     measure of a "peakedness" of its distribution. *)
 31 | val kurtosis : ?mean:float -> ?sd:float -> float array -> float
 32 | 
 33 | 
 34 | (** {e O(n log n)} Computes sample's ranks, [ties_strategy] controls
 35 |     which ranks are assigned to equal values:
 36 | 
 37 |     - [`Average] the average of ranks should be assigned to each value.
 38 |       {b Default}.
 39 |     - [`Min] the minimum of ranks is assigned to each value.
 40 |     - [`Max] the maximum of ranks is assigned to each value.
 41 | 
 42 |     Returns a pair, where the first element is ties correction factor
 43 |     and second is an array of sample ranks.
 44 | 
 45 |     {b References}
 46 | 
 47 |     + P. R. Freeman, "Algorithm AS 26: Ranking an array of numbers",
 48 |       Vol. 19, Applied Statistics, pp111-113, 1970. *)
 49 | val rank
 50 |   :  ?ties_strategy:[`Average | `Min | `Max]
 51 |   -> ?cmp:('a -> 'a -> int)
 52 |   -> 'a array
 53 |   -> (float * float array)
 54 | 
 55 | (** {e O(n)} Computes histogram of a data set. Bin sizes are uniform,
 56 |     based on a given [range], whic defaults to
 57 |     [(min - k, max + k)], where [k = (min - max) / (bins - 1) * 2].
 58 |     This behaviour is copied from the excellent
 59 |     {{: http://github.com/bos/statistics} statistics} library by
 60 |     Brian O'Sullivan. *)
 61 | val histogram
 62 |   :  ?n_bins:int
 63 |   -> ?range:(float * float)
 64 |   -> ?weights:float array
 65 |   -> ?density:bool
 66 |   -> float array
 67 |   -> (float array * float array)
 68 | 
 69 | 
 70 | module Quantile : sig
 71 |   (** Parameters for the continious sample method. *)
 72 |   type continuous_param =
 73 |     | CADPW           (** Linear interpolation of the {e ECDF}. *)
 74 |     | Hazen           (** Hazen's definition. *)
 75 |     | SPSS            (** Definition used by the SPSS statistics application,
 76 |                           also known as Weibull's definition. *)
 77 |     | S               (** Definition used by the S statistics application.org
 78 |                           Interpolation points divide the sample range into
 79 |                           [n - 1] intervals. {b Default}. *)
 80 |     | MedianUnbiased  (** Median unbiased definition. The resulting quantile
 81 |                           estimates are approximately median unbiased
 82 |                           regardless of the distribution of [vs] *)
 83 |     | NormalUnbiased  (** Normal unbiased definition. An approximately unbiased
 84 |                           estimate if the empirical distribution approximates
 85 |                           the normal distribution. *)
 86 | 
 87 |   (** {e O(n log n)} Estimates sample quantile corresponding to the given
 88 |       probability [p], using the continuous sample method with given
 89 |       parameters. *)
 90 |   val continuous_by
 91 |     : ?param:continuous_param -> ps:float array -> float array -> float array
 92 | 
 93 |   (** {e O(n log n)} Estimates interquantile range of a given sample,
 94 |       using the continuous sample method with given parameters. *)
 95 |   val iqr : ?param:continuous_param -> float array -> float
 96 | end
 97 | 
 98 | (** {e O(n log n)} Estimates sample quantile corresponding to the given
 99 |     probability [p], using the continuous sample method with default
100 |     parameters. *)
101 | val quantile : ps:float array -> float array -> float array
102 | 
103 | (** {e O(n log n)} Estimates interquantile range of a given sample,
104 |     using the continuous sample method with given parameters. *)
105 | val iqr : float array -> float
106 | 
107 | 
108 | module KDE : sig
109 |   (** Bandwidth selection rules. *)
110 |   type bandwidth =
111 |     | Silverman  (** Use {e rule-of-thumb} for choosing the bandwidth.
112 |                      It defaults to
113 |                      [0.9 * min(SD, IQR / 1.34) * n^-0.2]. *)
114 |     | Scott      (** Same as [Silverman], but with a factor, equal to
115 |                      [1.06]. *)
116 | 
117 |   type kernel =
118 |     | Gaussian
119 | 
120 |   (** {e O(n * points)} Simple kernel density estimator. Returns an array
121 |       of uniformly spaced points from the sample range at which the
122 |       density function was estimated, and the estimates at these points. *)
123 |   val estimate_pdf
124 |     :  ?kernel:kernel
125 |     -> ?bandwidth:bandwidth
126 |     -> ?n_points:int
127 |     -> float array
128 |     -> (float array * float array)
129 | 
130 |   (** {6 Example}
131 | 
132 |       {[
133 |         open Pareto
134 |         let open Distributions.Normal in
135 |         let vs = sample ~size:100 standard in
136 |         let (points, pdf) = Sample.KDE.estimate_pdf ~points:10 vs in begin
137 |           (* Output an ASCII density plot. *)
138 |           Array.iteri (fun i d ->
139 |               let count = int_of_float (d *. 20.) in
140 |               printf "%9.5f " points.(i);
141 |               for i = 0 to count do
142 |                 print_char (if i = count then '.' else ' ');
143 |               done;
144 | 
145 |               print_newline ();
146 |             ) pdf
147 |         end
148 |       ]}
149 | 
150 |       {6 References}
151 | 
152 |       + B.W. Silverman, "Density Estimation for Statistics and Data
153 |         Analysis", Vol. 26, Monographs on Statistics and Applied
154 |         Probability, Chapman and Hall, London, 1986. *)
155 | end
156 | 
157 | 
158 | module Correlation : sig
159 |   (** {e O(n)} Computes Pearson product-moment correlation coefficient
160 |       for two given samples. *)
161 |   val pearson : float array -> float array -> float
162 | 
163 |   (** {e O(n log n)} Computes Spearman rank correlation coefficient for
164 |       two given samples, which is essentially Pearson correlation
165 |       calculated for sample ranks. *)
166 |   val spearman : ?cmp:('a -> 'a -> int) -> 'a array -> 'a array -> float
167 | 
168 |   (** Autocorrelation, i. e. the correlation of the sample against a
169 |       shifted version of itself. *)
170 |   module Auto : sig
171 |     (** {e O(n^2)} Computes autocorrelation, using Person product-moment
172 |         correlation coefficient. *)
173 |     val pearson : float array -> float array
174 |   end
175 | end
176 | 
177 | 
178 | (** Calculates summary statistics over a possibly infinite stream of data.
179 | 
180 |     The algorithm runs in {e O(1)} space and {e O(n)} time.
181 | 
182 |     It is preferred for computing standard deviation, because roundoff
183 |     errors in floating point operations might lead to taking a square
184 |     root of a negative value.
185 | 
186 |     {6 References}
187 | 
188 |     + D. E. Knuth, "The Art of Computer Programming, Volume 2:
189 |       Seminumerical Algorithms", 2nd edition, Section 4.2.2, p216.
190 |     + T. B. Terriberry, "Computing Higher-Order Moments Online", 2008,
191 |       http://people.xiph.org/~tterribe/notes/homs.html *)
192 | module Summary : sig
193 |   type t
194 | 
195 |   (** Empty data set. *)
196 |   val empty : t
197 | 
198 |   (** Combines statistics from two data sets. *)
199 |   val combine : t -> t -> t
200 | 
201 |   (** Adds a value to the data set. *)
202 |   val add : t -> float -> t
203 | 
204 |   (** Returns the maximum added value or [nan] if the data set is empty. *)
205 |   val max : t -> float
206 | 
207 |   (** Returns the minimum added value or [nan] is the data set is empty. *)
208 |   val min : t -> float
209 | 
210 |   (** Returns the number of available values. *)
211 |   val size : t -> int
212 | 
213 |   (** Returns the arithmetic mean of the values that have been added
214 |       or [nan] if the data set is empty. *)
215 |   val mean : t -> float
216 | 
217 |   (** Returns the variance of the available values or [nan] if the
218 |       data set is empty. *)
219 |   val variance : t -> float
220 | 
221 |   (** Returns the standard deviation of the values that have been added
222 |       or [nan] if the data set is empty. *)
223 |   val sd : t -> float
224 | 
225 |   (** Returns the skewness of the values that have been added or [nan] if
226 |       the data set is empty.
227 | 
228 |       {b Note}: for small sample sizes estimated value might be inaccurate,
229 |       See issue #20. *)
230 |   val skewness : t -> float
231 | 
232 |   (** Returns the excess kurtosis of the values that have been added
233 |       or [nan] if the data set is empty.
234 | 
235 |       {b Note}: for small sample sizes estimated value might be inaccurate,
236 |       See issue #20. *)
237 |   val kurtosis : t -> float
238 | end
239 | 


--------------------------------------------------------------------------------
/lib/tests.ml:
--------------------------------------------------------------------------------
  1 | open Internal
  2 | 
  3 | type test_alternative = Less | Greater | TwoSided
  4 | 
  5 | type test_result = {
  6 |   test_statistic : float;
  7 |   test_pvalue    : float
  8 | }
  9 | 
 10 | let run_test ?(significance_level=0.05) f =
 11 |   let { test_pvalue = pvalue; _ } = f () in
 12 |   if pvalue <= significance_level
 13 |   then `NotSignificant
 14 |   else `Significant
 15 | 
 16 | 
 17 | module T = struct
 18 |   let finalize d t alternative =
 19 |     let open Distributions.T in
 20 |     let pvalue = match alternative with
 21 |       | Less     -> cumulative_probability d ~x:t
 22 |       | Greater  -> 1. -. cumulative_probability d ~x:t
 23 |       | TwoSided -> 2. *. cumulative_probability d ~x:(-. (abs_float t))
 24 |     in { test_statistic = t; test_pvalue = pvalue }
 25 | 
 26 |   let one_sample v ?(mean=0.) ?(alternative=TwoSided) () =
 27 |     let n = float_of_int (Array.length v) in
 28 |     (* Note(superbobry): R uses uncorrected sample variance, so the
 29 |        exact value of t-statistic might be slightly different. *)
 30 |     let t = (Sample.mean v -. mean) *. sqrt (n /. Sample.variance v)
 31 |     in finalize (Distributions.T.create ~df:(n -. 1.)) t alternative
 32 | 
 33 |   let two_sample_independent v1 v2
 34 |       ?(equal_variance=true) ?(mean=0.) ?(alternative=TwoSided) () =
 35 |     let n1 = float_of_int (Array.length v1)
 36 |     and n2 = float_of_int (Array.length v2)
 37 |     and (var1, var2) = (Sample.variance v1, Sample.variance v2) in
 38 | 
 39 |     let (df, denom) = if equal_variance
 40 |       then
 41 |         let df = n1 +. n2 -. 2. in
 42 |         let var12 = ((n1 -. 1.) *. var1 +. (n2 -. 1.) *. var2) /. df
 43 |         in (df, sqrt (var12 *. (1. /. n1 +. 1. /. n2)))
 44 |       else
 45 |         let vn1 = var1 /. n1 in
 46 |         let vn2 = var2 /. n2 in
 47 |         let df  =
 48 |           sqr (vn1 +. vn2) /. (sqr vn1 /. (n1 -. 1.) +. sqr vn2 /. (n2 -. 1.))
 49 |         in (df, sqrt (vn1 +. vn2))
 50 |     in
 51 | 
 52 |     let t = (Sample.mean v1 -. Sample.mean v2 -. mean) /. denom in
 53 |     finalize (Distributions.T.create ~df) t alternative
 54 | 
 55 |   let two_sample_paired v1 v2 ?(mean=0.) ?(alternative=TwoSided) () =
 56 |     let n = Array.length v1 in
 57 |     if n <> Array.length v2
 58 |     then invalid_arg "T.two_sample_paired: unequal length arrays";
 59 |     one_sample (Array.mapi ~f:(fun i x -> x -. v2.(i)) v1) ~mean ~alternative ()
 60 | end
 61 | 
 62 | module ChiSquared = struct
 63 |   let finalize d chisq =
 64 |     let open Distributions.ChiSquared in
 65 |     {
 66 |       test_statistic = chisq;
 67 |       test_pvalue    = 1. -. cumulative_probability ~x:chisq d
 68 |     }
 69 | 
 70 |   let goodness_of_fit observed ?expected ?(df=0) () =
 71 |     let n = Array.length observed in
 72 |     let expected = match expected with
 73 |       | Some expected ->
 74 |         if Array.length expected <> n
 75 |         then invalid_arg "ChiSquared.goodness_of_fit: unequal length arrays"
 76 |         else
 77 |           (* TODO(superbobry): make sure we have wellformed frequencies. *)
 78 |           expected
 79 |       | None ->
 80 |         Array.(make n (fold_left observed ~f:(+.) ~init:0. /. float_of_int n))
 81 |     and chisq = ref 0. in begin
 82 |       for i = 0 to n - 1 do
 83 |         chisq := !chisq +. sqr (observed.(i) -. expected.(i)) /. expected.(i)
 84 |       done
 85 |     end;
 86 | 
 87 |     finalize (Distributions.ChiSquared.create ~df:(n - 1 - df)) !chisq
 88 | 
 89 |   let independence observed ?(correction=false) () =
 90 |     let observed = Matrix_flat.of_arrays observed in
 91 |     let (m, n) = Matrix_flat.dims observed in
 92 |     if m = 0 || n = 0 then invalid_arg "ChiSquared.independence: no data"
 93 |     else if Matrix_flat.exists ~f:(fun x -> x < 0.) observed
 94 |     then invalid_arg ("ChiSquared.independence: observed values must " ^
 95 |                       "be non negative");
 96 | 
 97 |     let expected = Matrix_flat.create m n in
 98 |     let open Gsl.Blas_flat in
 99 |     gemm ~ta:Trans ~tb:NoTrans ~alpha:(1. /. Matrix_flat.sum observed) ~beta:1.
100 |       ~a:(Matrix_flat.row_sums observed)
101 |       ~b:(Matrix_flat.col_sums observed)
102 |       ~c:expected;
103 | 
104 |     if Matrix_flat.exists ~f:((=) 0.) expected
105 |     then invalid_arg ("ChiSquared.independence: computed expected " ^
106 |                       " frequencies matrix has a zero element");
107 | 
108 |     match (m - 1) * (n - 1) with
109 |     | 0  ->
110 |       (* This degenerate case is shamelessly ripped of from SciPy
111 |         'chi2_contingency' function. *)
112 |       { test_statistic = 0.; test_pvalue = 1. }
113 |     | df ->
114 |       let chisq =
115 |         let open Matrix_flat in
116 |         let t = create m n in begin
117 |           memcpy ~src:expected ~dst:t;
118 |           sub t observed;
119 |           if df = 1 && correction then begin
120 |             map t ~f:abs_float;  (* Use Yates' correction for continuity. *)
121 |             add_constant t (-. 0.5)
122 |           end;
123 |           mul_elements t t;
124 |           div_elements t expected;
125 |           sum t
126 |         end
127 |       in finalize (Distributions.ChiSquared.create ~df) chisq
128 | end
129 | 
130 | module KolmogorovSmirnov = struct
131 |   let create_h n d =
132 |     let k    = floor (float_of_int n *. d) +. 1. in
133 |     let h    = k -. float_of_int n *. d in
134 |     let size = 2 * int_of_float k - 1 in
135 |     let m    = Matrix_flat.create ~init:0. size size in begin
136 |       (* Set all element in the lower triangle to 1. *)
137 |       for i = 0 to size - 1 do
138 |         for j = max 0 (i - 1) to size - 1 do
139 |           Matrix_flat.set m (size - 1 - i) (size - 1 - j) 1.
140 |         done
141 |       done;
142 | 
143 |       (* Pre-calculate 'h' powers for the first column and bottom row. *)
144 |       let h_powers = Array.make size 0. in
145 |       Array.unsafe_set h_powers 0 h;
146 |       for i = 1 to size - 1 do
147 |         Array.unsafe_set h_powers i
148 |           ((Array.unsafe_get h_powers (i - 1)) *. h)
149 |       done;
150 | 
151 |       (* Correct first column and bottom row. *)
152 |       for i = 0 to size - 1 do
153 |         Matrix_flat.set m i 0 (Matrix_flat.get m i 0 -. Array.get h_powers i);
154 |         Matrix_flat.set m (size - 1) i
155 |           (Matrix_flat.get m (size - 1) i -. Array.get h_powers (size - 1 - i))
156 |       done;
157 | 
158 |       (* Correct bottom left element if needed. *)
159 |       if 2. *. h > 1.
160 |       then Matrix_flat.set m (size - 1) 0
161 |           (Matrix_flat.get m (size - 1) 0 +.
162 |              (2. *. h -. 1.) ** float_of_int size);
163 | 
164 |       (* Here come factorials! *)
165 |       let facts = Array.make size 1 in
166 |       for i = 1 to size - 1 do
167 |         Array.unsafe_set facts i (Array.unsafe_get facts (i - 1) * (i + 1))
168 |       done;
169 | 
170 |       for i = 0 to size - 1 do
171 |         for j = i + 1 to size - 1 do
172 |           Matrix_flat.set m (size - 1 - i) (size - 1 - j)
173 |             (Matrix_flat.get m (size - 1 - i) (size - 1 - j) /.
174 |                float_of_int (Array.unsafe_get facts (j - i)))
175 |         done
176 |       done
177 |     end; m
178 | 
179 |   let goodness_of_fit vs ~cumulative_probability:cp ?(alternative=TwoSided) () =
180 |     let n = Array.length vs in
181 |     if n < 1 then invalid_arg "KolmogorovSmirnov.goodness_of_fit: no data";
182 | 
183 |     let is       = Array.sort_index ~cmp:compare vs in
184 |     let ds_plus  = ref min_float
185 |     and ds_minus = ref min_float in begin
186 |       for i = 0 to n - 1 do
187 |         let j = Array.unsafe_get is i in
188 |         ds_minus := max !ds_minus
189 |             (cp vs.(j) -. float_of_int i /. float_of_int n);
190 |         ds_plus  := max !ds_plus
191 |             (float_of_int (i + 1) /. float_of_int n -. cp vs.(j))
192 |       done;
193 |     end;
194 | 
195 |     let d = match alternative with
196 |       | Less     -> !ds_minus
197 |       | Greater  -> !ds_plus
198 |       | TwoSided -> max !ds_minus !ds_plus
199 |     in
200 | 
201 |     let pvalue = match alternative with
202 |       | Less | Greater ->
203 |         if d <= 0.
204 |         then 0.
205 |         else if d >= 1.
206 |         then 1.
207 |         else
208 |           (* See Section 3 in Birnbaum and Tingey. *)
209 |           let acc = ref 0. in begin
210 |             for i = 0 to int_of_float (floor (float_of_int n *. (1. -. d))) do
211 |               let j  = float_of_int i in
212 |               let s1 = (float_of_int n -. j) *.
213 |                          log (1. -. d -. j /. float_of_int n)
214 |               and s2 = (j -. 1.) *. log (d +. j /. float_of_int n)
215 |               in acc := !acc +. exp (Gsl.Sf.lnchoose n i +. s1 +. s2)
216 |             done
217 |           end; d *. !acc
218 |       | TwoSided ->
219 |         (* See code in Marsaglia et al. *)
220 |         let s = float_of_int n *. d *. d in
221 |         if s > 7.24 || (s > 3.76 && n > 99)
222 |         then
223 |           1. -. 2. *. exp (-. (2.000071 +. 0.331 /. sqrt (float_of_int n) +.
224 |                                  1.409 /. float_of_int n) *. s)
225 |         else
226 |           (* FIXME(superbobry): control for overflow in matrix power
227 |              calculation. *)
228 |           let h   = Matrix_flat.power (create_h n d) n in
229 |           let k   = int_of_float (ceil (float_of_int n *. d)) in
230 |           let acc = ref (Matrix_flat.get h (k - 1) (k - 1)) in begin
231 |             for i = 1 to n do
232 |               acc := !acc *. float_of_int i /. float_of_int n
233 |             done
234 |           end; 1. -. !acc
235 |     in { test_statistic = d; test_pvalue = pvalue }
236 | 
237 |   (* FIXME(superbobry): see the following bug for critique of 'psmirnov2x'
238 |      implementation:
239 |      https://bugs.r-project.org/bugzilla3/show_bug.cgi?id=13848 *)
240 |   let psmirnov2x d n1 n2 =
241 |     let m  = min n1 n2
242 |     and n  = max n1 n2 in
243 |     let md = float_of_int m
244 |     and nd = float_of_int n in
245 |     let q  = (0.5 +. floor (d *. md *. nd -. 1e-7)) /. (md *. nd)
246 |     and u  = Array.make (n + 1) 0. in begin
247 |       for i = 0 to n do
248 |         Array.unsafe_set u i (if float_of_int i /. nd > q then 0. else 1.)
249 |       done;
250 | 
251 |       for i = 1 to m do
252 |         let w = float_of_int i /. (float_of_int i +. nd) in begin
253 |           Array.unsafe_set u 0
254 |             (if float_of_int i /. md > q
255 |              then 0.
256 |              else w *. Array.unsafe_get u 0);
257 | 
258 |           for j = 1 to n do
259 |             Array.unsafe_set u j
260 |               (if abs_float (float_of_int i /. md -. float_of_int j /. nd) > q
261 |                then 0.
262 |                else w *. Array.unsafe_get u j +. Array.unsafe_get u (j - 1))
263 |           done
264 |         end
265 |       done;
266 | 
267 |       Array.unsafe_get u n
268 |     end
269 | 
270 |   let two_sample v1 v2 ?(alternative=TwoSided) () =
271 |     let n1 = Array.length v1
272 |     and n2 = Array.length v2 in
273 |     if n1 = 0 || n2 = 0
274 |     then invalid_arg "KolmogorovSmirnov.two_sample: no data";
275 | 
276 |     let vs = Array.append v1 v2 in
277 |     let is = Array.sort_index ~cmp:compare vs in
278 |     let has_ties = ref false in
279 |     for i = 1 to n1 + n2 - 1 do
280 |       let j = Array.unsafe_get is i in
281 |       let k = Array.unsafe_get is (i - 1) in
282 |       has_ties := !has_ties || Array.unsafe_get vs j = Array.unsafe_get vs k
283 |     done;
284 | 
285 |     if !has_ties
286 |     then invalid_arg "KolmogorovSmirnov.two_sample: ties are not allowed";
287 | 
288 |     let is    = Array.sort_index ~cmp:compare vs in
289 |     let acc   = ref 0.
290 |     and z_max = ref neg_infinity
291 |     and z_min = ref infinity
292 |     and z_abs = ref neg_infinity in begin
293 |       for i = 0 to n1 + n2 - 1 do
294 |         let j = Array.unsafe_get is i in
295 |         acc :=
296 |           if j < n1
297 |           then !acc +. 1. /. float_of_int n1
298 |           else !acc -. 1. /. float_of_int n2;
299 |         z_max := max !z_max !acc;
300 |         z_min := min !z_min !acc;
301 |         z_abs := max !z_abs (abs_float !acc)
302 |       done
303 |     end;
304 | 
305 |     let d = match alternative with
306 |       | Less     -> -. !z_min
307 |       | Greater  -> !z_max
308 |       | TwoSided -> !z_abs
309 |     in
310 | 
311 |     let pvalue = match alternative with
312 |       | Less | Greater ->
313 |         (* Note(superbobry): there's also a X^2 approximation for
314 |            one-sided hypothesis, see:
315 |            L. Goodman, Kolmogorov-Smirnov test for psycological research. *)
316 |         exp (-. 2. *. float_of_int (n1 * n2) /. float_of_int (n1 + n2) *.
317 |                   d *. d)
318 |       | TwoSided ->
319 |         (* Note(superbobry): we follow R here and treat two-sided P-value
320 |            as a probability that a random assignment of the pooled
321 |            data to two sets of the same sizes as the input data sets
322 |            has a KS test statistic at least as great as that of the
323 |            input data. *)
324 |         1. -. psmirnov2x d n1 n2
325 |     in { test_statistic = d; test_pvalue = pvalue }
326 | end
327 | 
328 | module MannWhitneyU = struct
329 |   let two_sample_independent v1 v2
330 |       ?(alternative=TwoSided) ?(correction=true) () =
331 |     let n1 = float_of_int (Array.length v1)
332 |     and n2 = float_of_int (Array.length v2) in
333 |     if n1 = 0. || n2 = 0.
334 |     then invalid_arg "MannWhitneyU.two_sample_independent: no data";
335 | 
336 |     let n  = n1 +. n2 in
337 |     let (t, ranks) = Sample.rank (Array.append v1 v2) in
338 |     let w1 = Array.(fold_left (sub ranks ~pos:0 ~len:(int_of_float n1))
339 |                       ~f:(+.) ~init:0.)
340 |     and w2 = Array.(fold_left (sub ranks ~pos:(int_of_float n1)
341 |                                  ~len:(int_of_float n2))
342 |                       ~f:(+.) ~init:0.)
343 |     in
344 | 
345 |     let u1 = w1 -. n1 *. (n1 +. 1.) /. 2. in
346 |     let u2 = w2 -. n2 *. (n2 +. 1.) /. 2. in
347 |     let u  = min u1 u2 in
348 |     assert (u1 +. u2 = n1 *. n2);
349 | 
350 |     (* Lower bounds for normal approximation were taken from
351 |        Gravetter, Frederick J., and Larry B. Wallnau.
352 |        "Statistics for the behavioral sciences". Wadsworth Publishing
353 |        Company, 2006. *)
354 |     if t <> 0. || (n1 > 20. && n2 > 20.)
355 |     then
356 |       (* Normal approximation. *)
357 |       let mean  = n1 *. n2 /. 2. in
358 |       let sd    = sqrt ((n1 *. n2 /. 12.) *.
359 |                           ((n +. 1.) -. t /. (n *. (n -. 1.)))) in
360 |       let delta =
361 |         if correction
362 |         then match alternative with
363 |           | Less     -> -. 0.5
364 |           | Greater  -> 0.5
365 |           | TwoSided -> if u > mean then 0.5 else -. 0.5
366 |         else 0.
367 |       in
368 | 
369 |       let z = (u -. mean -. delta) /. sd in
370 |       let open Distributions.Normal in
371 |       let pvalue = match alternative with
372 |         | Less     -> cumulative_probability standard ~x:z
373 |         | Greater  -> 1. -. cumulative_probability standard ~x:z
374 |         | TwoSided ->
375 |           2. *. (min (cumulative_probability standard ~x:z)
376 |                      (1. -. cumulative_probability standard ~x:z))
377 |       in { test_statistic = u; test_pvalue = pvalue }
378 |     else
379 |       (* Exact critical value. *)
380 |       let k  = int_of_float (min n1 n2) in
381 |       let c  = Gsl.Combi.make (int_of_float n) k in
382 |       let c_n_k = Gsl.Sf.choose (int_of_float n) k in
383 |       let le = ref 0 in
384 |       let ge = ref 0 in
385 |       begin
386 |         for _i = 0 to int_of_float c_n_k - 1 do
387 |           let cu = Array.fold_left (Gsl.Combi.to_array c) ~init:0.
388 |               ~f:(fun acc i -> acc +. Array.unsafe_get ranks i) -.
389 |                      float_of_int (k * (k + 1)) /. 2.
390 |           in begin
391 |             if cu <= u then incr le;
392 |             if cu >= u then incr ge;
393 |             Gsl.Combi.next c
394 |           end
395 |         done;
396 | 
397 |         let pvalue = match alternative with
398 |           | Less     -> float_of_int !le /. c_n_k
399 |           | Greater  -> float_of_int !ge /. c_n_k
400 |           | TwoSided -> 2. *. float_of_int (min !le !ge) /. c_n_k
401 |         in { test_statistic = u; test_pvalue = pvalue }
402 |       end
403 | end
404 | 
405 | module WilcoxonT = struct
406 |   let two_sample_paired v1 v2 ?(alternative=TwoSided) ?(correction=true) () =
407 |     let n = Array.length v1 in
408 |     if n = 0
409 |     then invalid_arg "WilcoxonT.two_sample_paired: no data";
410 |     if n <> Array.length v2
411 |     then invalid_arg "WilcoxonT.two_sample_paired: unequal length arrays";
412 | 
413 |     let d  = Array.init n ~f:(fun i -> v2.(i) -. v1.(i)) in
414 |     let (zeros, non_zeros) = Array.partition ~f:((=) 0.) d in
415 |     let nz = float_of_int (Array.length non_zeros) in
416 |     let (t, ranks) = Sample.rank non_zeros
417 |         ~cmp:(fun d1 d2 -> compare (abs_float d1) (abs_float d2)) in
418 |     let w_plus  =
419 |       Array.fold_left ~f:(+.) ~init:0.
420 |         (Array.mapi non_zeros
421 |            ~f:(fun i v -> if v > 0. then ranks.(i) else 0.)) in
422 |     let w_minus = nz *. (nz +. 1.) /. 2. -. w_plus in
423 | 
424 |     (* Following Sheskin, W is computed as a minimum of W+ and W-. *)
425 |     let w = min w_plus w_minus in
426 | 
427 |     if t <> 0. || Array.length zeros <> 0 || n > 20
428 |     then
429 |       (* Normal approximation. *)
430 |       let mean  = nz *. (nz +. 1.) /. 4. in
431 |       let sd    = sqrt (nz *. (nz +. 1.) *. (2. *. nz +. 1.) /. 24. -.
432 |                           t /. 48.) in
433 |       let delta =
434 |         if correction
435 |         then match alternative with
436 |           | Less     -> -0.5
437 |           | Greater  -> 0.5
438 |           | TwoSided -> if w > mean then 0.5 else -0.5
439 |         else 0.
440 |       in
441 | 
442 |       let z = (w -. mean -. delta) /. sd in
443 |       let open Distributions.Normal in
444 |       let pvalue = match alternative with
445 |         | Less     -> cumulative_probability standard ~x:z
446 |         | Greater  -> 1. -. cumulative_probability standard ~x:z
447 |         | TwoSided ->
448 |           2. *. (min (cumulative_probability standard ~x:z)
449 |                      (1. -. cumulative_probability standard ~x:z))
450 |       in { test_statistic = w; test_pvalue = pvalue }
451 |     else
452 |       (* Exact critical value. *)
453 |       let le = ref 0 in
454 |       let ge = ref 0 in
455 |       let two_n = float_of_int (2 lsl (int_of_float nz)) in
456 |       begin
457 |         for i = 0 to int_of_float two_n - 1 do
458 |           let pw = ref 0. in
459 |           for j = 0 to int_of_float nz - 1 do
460 |             if (i lsr j) land 1 = 1
461 |             then pw := !pw +. Array.unsafe_get ranks j;
462 |           done;
463 | 
464 |           if !pw <= w then incr le;
465 |           if !pw >= w then incr ge;
466 |         done;
467 | 
468 |         let pvalue = match alternative with
469 |           | Less     -> float_of_int !le /. two_n
470 |           | Greater  -> float_of_int !ge /. two_n
471 |           | TwoSided -> 2. *. float_of_int (min !le !ge) /. two_n
472 |         in { test_statistic = w; test_pvalue = pvalue }
473 |       end
474 | 
475 |   let one_sample vs ?(shift=0.) =
476 |     two_sample_paired (Array.make (Array.length vs) shift) vs
477 | end
478 | 
479 | module Sign = struct
480 |   let two_sample_paired v1 v2 ?(alternative=TwoSided) () =
481 |     let n = Array.length v1 in
482 |     if n = 0
483 |     then invalid_arg "Sign.two_sample_paired: no data";
484 |     if n <> Array.length v2
485 |     then invalid_arg "Sign.two_sample_paired: unequal length arrays";
486 | 
487 |     let ds = Array.init n ~f:(fun i -> v1.(i) -. v2.(i)) in
488 |     let (pi_plus, pi_minus) = Array.fold_left ds ~init:(0, 0)
489 |         ~f:(fun (p, m) d ->
490 |             if d > 0.
491 |             then (succ p, m)
492 |             else if d < 0. then (p, succ m)
493 |             else (p, m))
494 |     in
495 | 
496 |     let open Distributions.Binomial in
497 |     let d = create ~trials:(pi_plus + pi_minus) ~p:0.5 in
498 |     let pvalue = match alternative with
499 |       | Less     -> cumulative_probability d ~k:pi_plus
500 |       | Greater  -> 1. -. cumulative_probability d ~k:(pi_plus - 1)
501 |       | TwoSided ->
502 |         2. *. (min (cumulative_probability d ~k:pi_plus)
503 |                  (1. -. cumulative_probability d ~k:(pi_plus - 1)))
504 |     in { test_statistic = float_of_int pi_plus; test_pvalue = min 1. pvalue }
505 | 
506 |   let one_sample vs ?(shift=0.) =
507 |     two_sample_paired vs (Array.make (Array.length vs) shift)
508 | end
509 | 
510 | 
511 | module Multiple = struct
512 |   type adjustment_method =
513 |     | HolmBonferroni
514 |     | BenjaminiHochberg
515 | 
516 |   let adjust pvalues how =
517 |     let m = Array.length pvalues in
518 |     let adjusted_pvalues = Array.make m 0. in
519 |     begin match how with
520 |       | HolmBonferroni ->
521 |         let is = Array.sort_index ~cmp:compare pvalues in
522 |         let iu = Array.sort_index ~cmp:compare is in begin
523 |           for i = 0 to m - 1 do
524 |             let j = Array.unsafe_get is i in
525 |             Array.unsafe_set adjusted_pvalues i
526 |               (min 1. (float_of_int (m - i) *. pvalues.(j)))
527 |           done;
528 | 
529 |           let cm = Base.cumulative ~f:max adjusted_pvalues in
530 |           Base.reorder iu ~src:cm ~dst:adjusted_pvalues
531 |         end
532 |       | BenjaminiHochberg ->
533 |         let is = Array.sort_index ~cmp:(fun v1 v2 -> compare v2 v1) pvalues in
534 |         let iu = Array.sort_index ~cmp:compare is in begin
535 |           for i = 0 to m - 1 do
536 |             let j = Array.unsafe_get is i in
537 |             Array.unsafe_set adjusted_pvalues i
538 |               (min 1. (float_of_int m /. float_of_int (m - i) *. pvalues.(j)))
539 |           done;
540 | 
541 |           let cm = Base.cumulative ~f:min adjusted_pvalues in
542 |           Base.reorder iu ~src:cm ~dst:adjusted_pvalues
543 |         end
544 |     end; adjusted_pvalues
545 | end
546 | 


--------------------------------------------------------------------------------
/lib/tests.mli:
--------------------------------------------------------------------------------
  1 | (** Statistical testing. *)
  2 | 
  3 | type test_alternative = Less | Greater | TwoSided
  4 | 
  5 | type test_result = {
  6 |   test_statistic : float;
  7 |   test_pvalue    : float
  8 | }
  9 | 
 10 | (** Assess significance of the statistical test at a given
 11 |     [significance_level], which defaults to [0.05]. *)
 12 | val run_test
 13 |   :  ?significance_level:float
 14 |   -> (unit -> test_result)
 15 |   -> [`Significant | `NotSignificant]
 16 | 
 17 | 
 18 | module T : sig
 19 |   (** One sample Student's t-test, which evaluates the null hypothesis
 20 |       that a [mean] of a normally distributed variable is equal to the
 21 |       specified value. *)
 22 |   val one_sample
 23 |     :  float array
 24 |     -> ?mean:float
 25 |     -> ?alternative:test_alternative
 26 |     -> unit
 27 |     -> test_result
 28 | 
 29 |   (** Two sample t-test, which evaluates the null hypothesis that the
 30 |       difference of means of two {e independent} normally distributed
 31 |       populations is equal to the specified value. *)
 32 |   val two_sample_independent
 33 |     :  float array
 34 |     -> float array
 35 |     -> ?equal_variance:bool
 36 |     -> ?mean:float
 37 |     -> ?alternative:test_alternative
 38 |     -> unit
 39 |     -> test_result
 40 | 
 41 |   (** Paired two sample t-test, which evaluates the null hypothes that
 42 |       the difference of means of the two {e paired} normally distributed
 43 |       populations is equal to the specified value. *)
 44 |   val two_sample_paired
 45 |     :  float array
 46 |     -> float array
 47 |     -> ?mean:float
 48 |     -> ?alternative:test_alternative
 49 |     -> unit
 50 |     -> test_result
 51 | end
 52 | 
 53 | (** Pearson's chi-squared test. *)
 54 | module ChiSquared : sig
 55 |   val goodness_of_fit
 56 |     : float array -> ?expected:float array -> ?df:int -> unit -> test_result
 57 | 
 58 |   val independence
 59 |     : float array array -> ?correction:bool -> unit -> test_result
 60 | end
 61 | 
 62 | module KolmogorovSmirnov : sig
 63 |   (** One-sample Kolmogorov-Smirnov test for goodness of fit, which
 64 |       evaluates the distribution [G(x)] of the observed random variable
 65 |       against a given distribution [F(x)]. Under the null hypothesis
 66 |       the two distributions are identical, [G(x) = F(x)]. *)
 67 |   val goodness_of_fit
 68 |     :  float array
 69 |     -> cumulative_probability:(float -> float)
 70 |     -> ?alternative:test_alternative
 71 |     -> unit
 72 |     -> test_result
 73 | 
 74 |   (** Two-sample Kolmogorov-Smirnov test, which evaluates the null
 75 |       hypothesis, that two {e independent} samples are drawn from the
 76 |       same continious distribution.
 77 | 
 78 |       {b Note}: in the current implementation samples with ties will
 79 |       result in an [Invalid_argument] exception. *)
 80 |   val two_sample
 81 |     :  float array
 82 |     -> float array
 83 |     -> ?alternative:test_alternative
 84 |     -> unit
 85 |     -> test_result
 86 | 
 87 |   (** {6 References}
 88 | 
 89 |       + National Institute of Standards and Technology (US), et al.
 90 |         "Engineering statistics handbook", Section 1.3.5.16.
 91 |         The Institute, 2001.
 92 |       + Jingbo Wang, Wai Wan Tsang, and George Marsaglia.
 93 |         "Evaluating Kolmogorov's distribution." Journal of
 94 |         Statistical Software 8, no. 18. 2003.
 95 |       + Z. W. Birnbaum, Fred H. Tingey. "One-sided confidence contours
 96 |         for probability distribution functions." The Annals of
 97 |         Mathematical Statistics, pp592-596. 1951. *)
 98 | end
 99 | 
100 | module MannWhitneyU : sig
101 |   (** Mann-Whitney U test (also known as Mann-Whitney-Wilcoxon test and
102 |       Wilcoxon rank sum test) is a non-paramteric test, which evaluates
103 |       the null hypothesis that two {e independent} samples have equal
104 |       medians. *)
105 |   val two_sample_independent
106 |     :  'a array
107 |     -> 'a array
108 |     -> ?alternative:test_alternative
109 |     -> ?correction:bool
110 |     -> unit
111 |     -> test_result
112 | 
113 |   (** {6 References}
114 | 
115 |       + Gravetter, Frederick J. and Larry B. Wallnau.
116 |         "Statistics for the behavioral sciences". Wadsworth Publishing
117 |         Company, 2006.
118 |       + David J. Sheskin. "Handbook of Parametric and Nonparametric
119 |         Statistical Procedures", 3rd edition. CRC Press, 2003. *)
120 | end
121 | 
122 | module WilcoxonT : sig
123 |   (** Wilcoxon signed-rank test, which evaluates the null hypothesis
124 |       that sample median is equal to the specified [shift].
125 | 
126 |       Test assumptions:
127 | 
128 |       + Sample under test was randomly selected from the population it
129 |         represents.
130 |       + All [vs -. shift] differences are iid and come from a continious
131 |         population. *)
132 |   val one_sample
133 |     :  float array
134 |     -> ?shift:float
135 |     -> ?alternative:test_alternative
136 |     -> ?correction:bool
137 |     -> unit
138 |     -> test_result
139 | 
140 |   (** Wilcoxon paired signed-rank test, which evaluates the null hypothesis
141 |       that two {e related} samples have equal medians.
142 | 
143 |       Test assumptions:
144 | 
145 |       + Samples under test were randomly selected from the population
146 |         they represent.
147 |       + Observation differences [vs2 -. vs1] are iid and come from a
148 |         continious population. *)
149 |   val two_sample_paired
150 |     :  float array
151 |     -> float array
152 |     -> ?alternative:test_alternative
153 |     -> ?correction:bool
154 |     -> unit
155 |     -> test_result
156 | 
157 |   (** {6 References}
158 | 
159 |       + David J. Sheskin. "Handbook of Parametric and Nonparametric
160 |         Statistical Procedures", 3rd edition. CRC Press, 2003.
161 |       + http://www.fon.hum.uva.nl/Service/Statistics/Signed_Rank_Algorihms.html *)
162 | end
163 | 
164 | module Sign : sig
165 |   (** Sign test, which evaluates the null hypothesis that sample median is
166 |       equal to the specified [shift].
167 | 
168 |       Test assumptions:
169 | 
170 |       + Sample under test was randomly selected from the population it
171 |         represents. *)
172 |   val one_sample
173 |     :  float array
174 |     -> ?shift:float
175 |     -> ?alternative:test_alternative
176 |     -> unit
177 |     -> test_result
178 | 
179 |   (** Dependent samples sign test, which evaluates the null hypothesis
180 |       that the median difference between observations from two {e related}
181 |       samples is zero.
182 | 
183 |       Test assumptions:
184 | 
185 |       + Samples under test were randomly selected from the population they
186 |         represent. *)
187 |   val two_sample_paired
188 |     :  float array
189 |     -> float array
190 |     -> ?alternative:test_alternative
191 |     -> unit
192 |     -> test_result
193 | end
194 | 
195 | 
196 | (** Adjustments for multiple comparisons. *)
197 | module Multiple : sig
198 |   type adjustment_method =
199 |     | HolmBonferroni
200 |     | BenjaminiHochberg
201 | 
202 |   (** Adjusts obtained P-values for multiple comparisons using a given
203 |       adjustment method. *)
204 |   val adjust : float array -> adjustment_method -> float array
205 | 
206 |   (** {6 References}
207 | 
208 |       + Yoav Benjamini and Yosef Hochberg. "Controlling the false discovery
209 |         rate: a practical and powerful approach to multiple testing.",
210 |         Journal of the Royal Statistical Society, Series B (Methodological),
211 |         pp289-300, 1995. *)
212 | end
213 | 


--------------------------------------------------------------------------------
/lib_test/common.ml:
--------------------------------------------------------------------------------
 1 | open OUnit
 2 | 
 3 | let assert_almost_equal ?(epsilon=1e-6) ?msg x y =
 4 |   (* Note(superbobry): original version can't handle infinities properly.
 5 |      See #1381 in oUnit bug tracker. *)
 6 |   if not (x = infinity || y = infinity ||
 7 |           x = neg_infinity || y = neg_infinity)
 8 |   then assert_equal
 9 |       ?msg
10 |       ~cmp:(cmp_float ~epsilon)
11 |       ~printer:(Printf.sprintf "%.10f") x y
12 | 
13 | let cmp_array ~cmp v1 v2 =
14 |   let n   = min (Array.length v1) (Array.length v2)
15 |   and res = ref true in
16 |   for i = 0 to n - 1 do
17 |     res := !res && cmp (Array.unsafe_get v1 i) (Array.unsafe_get v2 i)
18 |   done; !res
19 | 
20 | let printer_array ~printer vs =
21 |   let inner = String.concat ", " Array.(to_list (map printer vs)) in
22 |   Printf.sprintf "[%s]" inner
23 | 


--------------------------------------------------------------------------------
/lib_test/distributions_test.ml:
--------------------------------------------------------------------------------
 1 | open OUnit
 2 | 
 3 | open Pareto.Distributions
 4 | 
 5 | open Common
 6 | 
 7 | 
 8 | let test_log_density () =
 9 |   let go ~msg
10 |       (type t)
11 |       (module D : ContinuousDistribution with type t = t and type elt = float)
12 |       (d : t) =
13 |     for i = 0 to 100 do
14 |       let x = Uniform.(random (create ~lower:0. ~upper:42.)) in
15 |       assert_almost_equal ~msg (log (D.density d ~x)) (D.log_density d ~x)
16 |     done
17 |   in begin
18 |     go "Normal" (module Normal) (normal ~mean:4. ~sd:2.);
19 |     go "LogNormal" (module LogNormal) (log_normal ~mean:4. ~sd:2.);
20 |     go "Uniform" (module Uniform) (uniform ~lower:2. ~upper:4.);
21 |     go "Exponential" (module Exponential) (exponential ~scale:42.);
22 |     go "ChiSquared" (module ChiSquared) (chi_squared ~df:42);
23 |     go "F" (module F) (f ~df1:4 ~df2:2);
24 |     go "T" (module T) (t ~df:42.);
25 |     go "Gamma" (module Gamma) (gamma ~shape:4. ~scale:2.);
26 |     go "Cauchy" (module Cauchy) (cauchy ~location:4. ~scale:2.);
27 |     go "Beta" (module Beta) (beta ~alpha:4. ~beta:2.);
28 |     go "Logistic" (module Logistic) (logistic ~location:4. ~scale:2.)
29 |   end
30 | 
31 | and test_log_probability () =
32 |   let go msg
33 |       (type t)
34 |       (module D : DiscreteDistribution with type t = t and type elt = int)
35 |       (d : t) =
36 |     for i = 0 to 100 do
37 |       let k = int_of_float (Uniform.(random (create ~lower:0. ~upper:42.))) in
38 |       assert_almost_equal ~msg:msg
39 |         (log (D.probability d ~k)) (D.log_probability d ~k)
40 |     done
41 |   in begin
42 |     go "Poisson" (module Poisson) (poisson ~rate:42.);
43 |     go "Bernoulli" (module Bernoulli) (bernoulli ~p:0.42);
44 |     go "Binomial" (module Binomial) (binomial ~trials:42 ~p:0.42);
45 |     go "Geometric" (module Geometric) (geometric ~p:0.42);
46 |     go "Hypergeometric" (module Hypergeometric)
47 |       (hypergeometric ~m:4 ~t:42 ~k:2);
48 |     go "NegativeBinomial" (module NegativeBinomial)
49 |       (negative_binomial ~failures:42. ~p:0.42);
50 |   end
51 | 
52 | 
53 | let test_categorical () =
54 |   let module Strings = Categorical.Make(String) in
55 |   let d =
56 |     Strings.create [|("C", 0.3); ("A", 0.1); ("G", 0.5); ("T", 0.1)|]
57 |   in begin
58 |     assert_almost_equal ~msg:"Pr(X = A)" 0.1 (Strings.probability d "A");
59 |     assert_almost_equal ~msg:"Pr(X = C)" 0.3 (Strings.probability d "C");
60 |     assert_almost_equal ~msg:"Pr(X = G)" 0.5 (Strings.probability d "G");
61 |     assert_almost_equal ~msg:"Pr(X = T)" 0.1 (Strings.probability d "T");
62 | 
63 |     assert_almost_equal ~msg:"Pr(X <= A)"
64 |       (Strings.cumulative_probability d "A") 0.1;
65 |     assert_almost_equal ~msg:"Pr(X <= T)"
66 |       (Strings.cumulative_probability d "T") 1.0;
67 | 
68 |     assert_almost_equal ~msg:"Pr(X <= $)"
69 |       (Strings.cumulative_probability d "$") 0.
70 |   end
71 | 
72 | 
73 | let test_gamma_mle () =
74 |   let shape = Uniform.(random (create ~lower:0. ~upper:42.))
75 |   and scale = Uniform.(random (create ~lower:0. ~upper:42.)) in
76 |   let open Gamma in
77 |   let vs = sample ~size:(1 lsl 16) (create ~shape ~scale) in
78 |   let { gamma_shape; gamma_scale } = mle ~n_iter:100 ~epsilon:1e-6 vs in
79 |   begin
80 |     assert_almost_equal ~msg:"shape" ~epsilon:0.01 shape gamma_shape;
81 |     assert_almost_equal ~msg:"scale" ~epsilon:0.01 scale gamma_scale
82 |   end
83 | 
84 | 
85 | let test = "Distributions" >::: [
86 |     "categorical" >:: test_categorical;
87 |     "density vs. log_density" >:: test_log_density;
88 |     "probability vs. log_probability" >:: test_log_probability;
89 | 
90 |     "Gamma MLE" >:: test_gamma_mle;
91 | ]
92 | 


--------------------------------------------------------------------------------
/lib_test/sample_test.ml:
--------------------------------------------------------------------------------
  1 | open OUnit
  2 | 
  3 | open Pareto.Sample
  4 | 
  5 | open Common
  6 | 
  7 | 
  8 | let test_summary ~size () =
  9 |   let vs =
 10 |     let open Pareto.Distributions.Uniform in
 11 |     sample ~size (create ~lower:(-42.) ~upper:42.)
 12 |   in
 13 | 
 14 |   let s = Array.fold_left Summary.add Summary.empty vs in begin
 15 |     assert_almost_equal ~msg:"min" (Summary.min s) (min vs);
 16 |     assert_almost_equal ~msg:"max" (Summary.max s) (max vs);
 17 |     assert_equal ~msg:"size" (Summary.size s) (Array.length vs);
 18 |     assert_almost_equal ~msg:"mean" (Summary.mean s) (mean vs);
 19 |     assert_almost_equal ~msg:"variance" (Summary.variance s) (variance vs);
 20 |     assert_almost_equal ~msg:"sd" (Summary.sd s) (sd vs)
 21 |   end
 22 | 
 23 | and test_combined_summary ~size () =
 24 |   let vs =
 25 |     let open Pareto.Distributions.Uniform in
 26 |     sample ~size (create ~lower:(-42.) ~upper:42.)
 27 |   in
 28 | 
 29 |   let mid = size / 2 in
 30 |   let v1  = Array.sub vs 0 mid
 31 |   and v2  = Array.sub vs mid (size - mid) in
 32 |   let s1  = Array.fold_left Summary.add Summary.empty v1
 33 |   and s2  = Array.fold_left Summary.add Summary.empty v2 in
 34 |   let s12 = Summary.combine s1 s2
 35 |   and s   = Array.fold_left Summary.add Summary.empty vs in begin
 36 |     assert_almost_equal ~msg:"min" (Summary.min s) (Summary.min s12);
 37 |     assert_almost_equal ~msg:"max" (Summary.max s) (Summary.max s12);
 38 |     assert_equal ~msg:"size" (Summary.size s) (Summary.size s12);
 39 |     assert_almost_equal ~msg:"mean" (Summary.mean s) (Summary.mean s12);
 40 |     assert_almost_equal ~msg:"variance"
 41 |       (Summary.variance s) (Summary.variance s12);
 42 |     assert_almost_equal ~msg:"sd" (Summary.sd s) (Summary.sd s12);
 43 |     assert_almost_equal ~msg:"skewness"
 44 |       (Summary.skewness s) (Summary.skewness s12);
 45 |     assert_almost_equal ~msg:"kurtosis"
 46 |       (Summary.kurtosis s) (Summary.kurtosis s12);
 47 |   end
 48 | 
 49 | and test_quantile () =
 50 |   let rec vs =
 51 |     [|0.952363286988083; 0.829666168783014; 0.563616484350936;
 52 |       0.386334933107061; 0.0833841367636058; 0.99997428768617;
 53 |       0.374802467851785; 0.349201461890657; 0.89384498325876;
 54 |       0.431750792813907|]
 55 |   and go ~param ~msg expected =
 56 |     assert_equal
 57 |       ~msg ~cmp:(cmp_array ~cmp:cmp_float)
 58 |       ~printer:(printer_array ~printer:(Printf.sprintf "%.6f"))
 59 |       (Quantile.continuous_by ~param ~ps:[|0.; 0.25; 0.5; 0.75; 1.|] vs)
 60 |       expected
 61 |   in begin
 62 |     go ~param:Quantile.CADPW ~msg:"type 4" [|
 63 |       0.0833841367636058; 0.362001964871221; 0.431750792813907;
 64 |       0.861755576020887; 0.99997428768617
 65 |     |];
 66 |     go ~param:Quantile.Hazen ~msg:"type 5" [|
 67 |       0.0833841367636058; 0.374802467851785; 0.497683638582421;
 68 |       0.89384498325876; 0.99997428768617
 69 |     |];
 70 |     go ~param:Quantile.SPSS ~msg:"type 6" [|
 71 |       0.0833841367636058; 0.368402216361503; 0.497683638582421;
 72 |       0.908474559191091; 0.99997428768617
 73 |     |];
 74 |     go ~param:Quantile.S ~msg:"type 7" [|
 75 |       0.0833841367636058; 0.377685584165604; 0.497683638582421;
 76 |       0.877800279639823; 0.99997428768617
 77 |     |];
 78 |     go ~param:Quantile.MedianUnbiased ~msg:"type 8" [|
 79 |       0.0833841367636058; 0.372669050688358; 0.497683638582421;
 80 |       0.898721508569537; 0.99997428768617
 81 |     |];
 82 |     go ~param:Quantile.NormalUnbiased ~msg:"type 9" [|
 83 |       0.0833841367636058; 0.373202404979215; 0.497683638582421;
 84 |       0.897502377241843; 0.99997428768617
 85 |     |]
 86 |   end
 87 | 
 88 | and test_rank () =
 89 |   let vs = [|4.; 19.; 11.; 18.; 2.; 12.; 13.; 16.; 0.; 2.;
 90 |              2.; 7.; 1.; 17.; 16.; 19.; 11.; 12.; 19.; 4.|]
 91 |   in begin
 92 |     assert_equal
 93 |       ~msg:"average" ~cmp:(cmp_array ~cmp:cmp_float)
 94 |       ~printer:(printer_array ~printer:(Printf.sprintf "%.6f"))
 95 |       (snd (rank ~ties_strategy:`Average vs))
 96 |       [|6.5; 19.; 9.5; 17.; 4.; 11.5; 13.; 14.5; 1.; 4.; 4.;
 97 |         8.; 2.; 16.; 14.5; 19.; 9.5; 11.5; 19.; 6.5|];
 98 |     assert_equal
 99 |       ~msg:"min" ~cmp:(cmp_array ~cmp:cmp_float)
100 |       ~printer:(printer_array ~printer:(Printf.sprintf "%.6f"))
101 |       (snd (rank ~ties_strategy:`Min vs))
102 |       [|6.; 18.; 9.; 17.; 3.; 11.; 13.; 14.; 1.; 3.; 3.; 8.;
103 |         2.; 16.; 14.; 18.; 9.; 11.; 18.; 6.|];
104 |     assert_equal
105 |       ~msg:"max" ~cmp:(cmp_array ~cmp:cmp_float)
106 |       ~printer:(printer_array ~printer:(Printf.sprintf "%.6f"))
107 |       (snd (rank ~ties_strategy:`Max vs))
108 |       [|7.; 20.; 10.; 17.; 5.; 12.; 13.; 15.; 1.; 5.; 5.; 8.;
109 |         2.; 16.; 15.; 20.; 10.; 12.; 20.; 7.|]
110 |   end
111 | 
112 | and test_correlation () =
113 |   let v1 = [|0.0172824052698839; -0.454504077892448; 1.25946495815052;
114 |              1.34046889912174; 0.609915320636686; -0.217651830293509;
115 |              0.991287334206914; -0.186392670591343; 0.0266357683474309;
116 |              -1.45310338401619|]
117 |   and v2 = [|0.505340435184033; -1.42575172974959; -0.521196733941402;
118 |              -0.0185022933706756; 0.17230654109602; 1.67872553102743;
119 |              0.480586104798118; 0.910431368258919; 0.373583673677502;
120 |              -0.3494655448979|]
121 |   in begin
122 |     assert_almost_equal ~msg:"Pearson product-moment correlation"
123 |       (Correlation.pearson v1 v2) 0.02894452;
124 |     assert_almost_equal ~msg:"Spearman rank correlation"
125 |       (Correlation.spearman v1 v2) (-0.07878788);
126 | 
127 |     assert_equal
128 |       ~msg:"Pearson product-moment self-correlation"
129 |       ~cmp:(cmp_array ~cmp:cmp_float)
130 |       ~printer:(printer_array ~printer:(Printf.sprintf "%.6f"))
131 |       (Correlation.Auto.pearson v1)
132 |       [|1.; 0.0975335049156232; 0.00331984882534066; -0.20666268433441;
133 |         0.179634622194576; -0.255208927944468; -0.289850963695003;
134 |         -0.233617717921346; 0.162004926236094; 0.0428473917235931|]
135 |   end
136 | 
137 | and test_moments () =
138 |   let vs =
139 |     let open Pareto.Distributions.Uniform in
140 |     sample ~size:1024 (create ~lower:(-42.) ~upper:42.)
141 |   in
142 | 
143 |   let k  = 8 in
144 |   let ms = Pareto.Sample.moments k vs in
145 |   for p = 1 to k do
146 |     assert_almost_equal ~msg:(string_of_int p)
147 |       (Array.fold_left (+.) 0.
148 |          (Array.map (fun v -> v ** float_of_int p) vs) /. 1024.)
149 |       ms.(p - 1)
150 |   done
151 | 
152 | 
153 | let test = "Sample" >::: [
154 |     "summary statistics, n = 100" >:: test_summary ~size:100;
155 |     "summary statistics, n = 1000" >:: test_summary ~size:1000;
156 |     "summary statistics, n = 10000" >:: test_summary ~size:10000;
157 |     "combined summary, n = 100" >:: test_combined_summary ~size:100;
158 |     "combined summary, n = 1000" >:: test_combined_summary ~size:1000;
159 |     "combined summary, n = 10000" >:: test_combined_summary ~size:10000;
160 |     "quantile" >:: test_quantile;
161 |     "rank" >:: test_rank;
162 |     "correlation" >:: test_correlation;
163 |     "moments" >:: test_moments
164 |   ]
165 | 


--------------------------------------------------------------------------------
/lib_test/test.ml:
--------------------------------------------------------------------------------
1 | open OUnit
2 | 
3 | let all () = TestList [
4 |     Tests_test.test;
5 |     Sample_test.test;
6 |     Distributions_test.test
7 |   ]
8 | 


--------------------------------------------------------------------------------
/lib_test/test_runner.ml:
--------------------------------------------------------------------------------
1 | let () =
2 |   let open OUnit in
3 |   ignore (run_test_tt_main (Test.all ()))
4 | 


--------------------------------------------------------------------------------
/lib_test/tests_test.ml:
--------------------------------------------------------------------------------
  1 | open OUnit
  2 | 
  3 | open Pareto.Tests
  4 | 
  5 | open Common
  6 | 
  7 | 
  8 | let assert_equal_test_result ~msg tr1 tr2 =
  9 |   assert_almost_equal ~msg tr1.test_statistic tr2.test_statistic;
 10 |   assert_almost_equal ~msg tr1.test_pvalue tr2.test_pvalue
 11 | 
 12 | let assert_equal_test_results ?(msg="")
 13 |     (f : ?alternative:test_alternative -> unit -> test_result)
 14 |     expected =
 15 |   List.iter2 (fun tr1 alternative ->
 16 |       let tr2       = f ~alternative () in
 17 |       let direction = match alternative with
 18 |         | TwoSided -> "two-sided"
 19 |         | Less     -> "less"
 20 |         | Greater  -> "greater"
 21 |       in assert_equal_test_result
 22 |         ~msg:(if msg = ""
 23 |               then direction
 24 |               else Printf.sprintf "%s, %s" msg direction)
 25 |         tr1 tr2
 26 |     ) expected [TwoSided; Less; Greater]
 27 | 
 28 | 
 29 | let t_test_one_sample () =
 30 |   let vs = [|0.88456; 0.43590; 0.95778; -1.05039; -0.38589;
 31 |              -0.06342; -0.18712; 1.58856; 0.86964; 1.22192|]
 32 |   in begin
 33 |     assert_equal_test_results (T.one_sample vs ~mean:0.) [
 34 |       { test_statistic = 1.6368118; test_pvalue = 0.1360967 };
 35 |       { test_statistic = 1.6368118; test_pvalue = 0.931951 };
 36 |       { test_statistic = 1.6368118; test_pvalue = 0.0680483 }
 37 |     ];
 38 | 
 39 |     (* Zero division issues. *)
 40 |     let tr =
 41 |       T.one_sample [|0.; 0.; 0.; 0.|] ~mean:0. ~alternative:TwoSided ()
 42 |     in begin
 43 |       assert_bool "t-statistic or t-test P-value is not nan"
 44 |         (tr.test_statistic <> tr.test_statistic &&
 45 |          tr.test_pvalue <> tr.test_pvalue)
 46 |     end
 47 |   end
 48 | 
 49 | and t_test_two_sample_independent () =
 50 |   let v1 = [|-0.86349; 0.36688; -0.48266; 0.53237; -0.87635;
 51 |              -1.28357; -1.46325; 0.21937; -0.38159; -0.22752|]
 52 |   and v2 = [|-0.20951; 1.27388; 0.27331; 1.85599; -1.09702;
 53 |              -0.20033; -0.45065; 0.06710; -0.18932; 1.60007|]
 54 |   in begin
 55 |     assert_equal_test_results ~msg:"unequal variance"
 56 |       (T.two_sample_independent v1 v2 ~mean:0.42 ~equal_variance:false)
 57 |       [{ test_statistic = -3.097208; test_pvalue = 0.00683211 };
 58 |        { test_statistic = -3.097208; test_pvalue = 0.003416056 };
 59 |        { test_statistic = -3.097208; test_pvalue = 0.996583 }];
 60 |     assert_equal_test_results ~msg:"equal variance"
 61 |       (T.two_sample_independent v1 v2 ~mean:0.24 ~equal_variance:true)
 62 |       [{ test_statistic = -2.615915; test_pvalue = 0.01750332 };
 63 |        { test_statistic = -2.615915; test_pvalue = 0.00875166 };
 64 |        { test_statistic = -2.615915; test_pvalue = 0.991248 }]
 65 |   end
 66 | 
 67 | and t_test_two_sample_paired () =
 68 |   let v1 = [|-0.86349; 0.36688; -0.48266; 0.53237; -0.87635;
 69 |              -1.28357; -1.46325; 0.21937; -0.38159; -0.22752|]
 70 |   and v2 = [|-0.20951; 1.27388; 0.27331; 1.85599; -1.09702;
 71 |              -0.20033; -0.45065; 0.06710; -0.18932; 1.60007|]
 72 |   in begin
 73 |     assert_equal_test_results
 74 |       (T.two_sample_paired v1 v2 ~mean:0.)
 75 |       [{ test_statistic = -3.607401; test_pvalue = 0.0056823 };
 76 |        { test_statistic = -3.607401; test_pvalue = 0.00284115 };
 77 |        { test_statistic = -3.607401; test_pvalue = 0.997158 }];
 78 |   end
 79 | 
 80 | and chisq_test_gof () =
 81 |   let observed = [|42.; 24.; 10.; 10.|]
 82 |   and expected = [|21.; 22.; 17.; 26.|]
 83 |   in begin
 84 |     assert_equal_test_result ~msg:"with uniform probabilities"
 85 |       (ChiSquared.goodness_of_fit observed ())
 86 |       { test_statistic = 32.139534; test_pvalue = 4.8908077607843233e-07 };
 87 | 
 88 |     (* Note(superbobry): R doesn't have a version of the test with
 89 |        frequences, thus we use 'scipy.stats.chisquare' as a reference. *)
 90 |     assert_equal_test_result ~msg:"with given probabilities"
 91 |       (ChiSquared.goodness_of_fit observed ~expected ())
 92 |       { test_statistic = 33.910324; test_pvalue = 2.06945476533e-07 }
 93 |   end
 94 | 
 95 | and chisq_test_independence () =
 96 |   let observed = [|
 97 |     [|4.; 3.; 5.; 3.; 5.; 3.; 2.; 5.; 4.; 4.; 4.; 3.|];
 98 |     [|2.; 2.; 1.; 2.; 3.; 1.; 2.; 3.; 2.; 1.; 1.; 3.|];
 99 |     [|2.; 4.; 3.; 3.; 4.; 3.; 3.; 4.; 4.; 1.; 2.; 1.|];
100 |     [|3.; 5.; 4.; 3.; 4.; 4.; 3.; 3.; 3.; 4.; 4.; 4.|]
101 |   |] in begin
102 |     assert_equal_test_result ~msg:"with continuity correction"
103 |       (ChiSquared.independence observed ~correction:true ())
104 |       { test_statistic = 9.153073; test_pvalue = 0.999987 };
105 |   end
106 | 
107 | and ks_test_gof () =
108 |   let vs = [|-1.52455; 0.79745; 0.76526; -2.32246; 0.15411;
109 |              -1.36430; 0.62041; 1.17614; 1.09825; -0.17400|]
110 |   in begin
111 |     let open Pareto.Distributions.Normal in
112 |     assert_equal_test_results ~msg:"standard normal"
113 |       (KolmogorovSmirnov.goodness_of_fit vs
114 |          ~cumulative_probability:(fun x -> cumulative_probability standard ~x))
115 |       [{ test_statistic = 0.232506; test_pvalue = 0.575175 };
116 |        { test_statistic = 0.232506; test_pvalue = 0.293906 };
117 |        { test_statistic = 0.2137634; test_pvalue = 0.3516124 }]
118 |   end
119 | 
120 | and ks_test_two_sample () =
121 |   let v1 = [|0.60074; 1.93516; 0.62419; -0.40251; -0.14719;
122 |              -0.05324; -0.95052; 1.84247; -0.58041; -0.75201|]
123 |   and v2 = [|-0.33866; -0.59032; -0.12525; -0.81013; -0.60733;
124 |              0.18550; 1.01396; 0.17067; -0.74872; 1.03694|]
125 |   in begin
126 |     assert_equal_test_results
127 |       (KolmogorovSmirnov.two_sample v1 v2)
128 |       [{ test_statistic = 0.2; test_pvalue = 0.994457 };
129 |        { test_statistic = 0.2; test_pvalue = 0.670320 };
130 |        { test_statistic = 0.1; test_pvalue = 0.904837 }]
131 |   end
132 | 
133 | and mann_whitney_test_two_sample () =
134 |   (* Source: 'scipy/stats/tests/test_stats.py'. *)
135 |   let v1 = [|1.; 1.; 1.; 1.; 1.; 1.; 1.; 1.; 1.; 1.; 1.; 1.; 1.;
136 |              1.; 1.; 1.; 1.; 1.; 1.; 1.; 1.; 2.; 1.; 1.; 1.; 1.; 1.; 1.; 1.;
137 |              1.; 1.; 1.; 1.; 1.; 1.; 1.; 1.; 1.; 1.; 1.; 1.; 1.; 1.; 1.; 1.;
138 |              1.; 1.; 1.; 1.; 1.; 1.; 1.; 1.; 1.; 1.; 1.; 1.; 1.; 1.; 1.; 1.;
139 |              1.; 1.; 2.; 1.; 1.; 1.; 1.; 1.; 1.; 1.; 1.; 1.; 1.; 1.; 1.; 1.;
140 |              1.; 1.; 1.; 1.; 1.; 1.; 1.; 1.; 1.; 1.; 1.; 1.; 1.; 1.; 1.; 1.;
141 |              1.; 1.; 1.; 1.; 2.; 1.; 1.; 1.; 1.; 1.; 1.; 1.; 1.; 1.; 1.; 1.;
142 |              1.; 1.; 1.; 1.; 1.; 2.; 1.; 1.; 1.; 1.; 2.; 1.; 1.; 2.; 1.; 1.;
143 |              2.; 1.; 1.; 1.; 1.; 1.; 1.; 1.; 1.; 1.; 1.; 1.; 1.; 1.; 1.; 1.;
144 |              1.; 1.; 1.; 1.; 1.; 1.; 1.; 1.; 1.; 1.; 1.; 2.; 1.; 1.; 1.; 1.;
145 |              1.; 1.; 1.; 1.; 1.; 1.; 1.; 1.; 1.; 1.; 1.; 1.; 1.; 1.; 1.; 1.;
146 |              1.; 1.; 1.; 1.; 1.; 1.; 1.; 1.; 1.; 1.; 1.; 1.; 1.; 1.; 1.; 1.;
147 |              2.; 1.; 1.; 1.; 1.; 1.; 1.; 1.; 1.; 1.; 2.; 1.; 1.; 1.; 1.; 1.;
148 |              1.; 1.; 1.; 1.; 1.; 1.; 1.; 1.; 1.; 1.; 1.; 1.; 1.; 3.; 1.; 1.;
149 |              1.; 1.; 1.; 1.; 1.; 1.; 1.; 1.; 1.; 1.; 1.; 1.; 1.; 1.; 1.; 1.;
150 |              1.; 1.; 1.; 1.; 1.; 1.; 1.|]
151 |   and v2 = [|1.; 1.; 1.; 1.; 1.; 1.; 1.; 2.; 1.; 2.; 1.; 1.; 1.;
152 |              1.; 2.; 1.; 1.; 1.; 2.; 1.; 1.; 1.; 1.; 1.; 2.; 1.; 1.; 3.; 1.;
153 |              1.; 1.; 1.; 1.; 1.; 1.; 1.; 1.; 1.; 2.; 1.; 2.; 1.; 1.; 1.; 1.;
154 |              1.; 1.; 2.; 1.; 1.; 1.; 1.; 1.; 1.; 1.; 1.; 1.; 1.; 1.; 1.; 1.;
155 |              1.; 1.; 2.; 1.; 1.; 1.; 1.; 1.; 2.; 2.; 1.; 1.; 2.; 1.; 1.; 2.;
156 |              1.; 2.; 1.; 1.; 1.; 1.; 2.; 2.; 1.; 1.; 1.; 1.; 1.; 1.; 1.; 1.;
157 |              1.; 1.; 1.; 1.; 1.; 1.; 2.; 1.; 1.; 1.; 1.; 1.; 2.; 2.; 2.; 1.;
158 |              1.; 1.; 1.; 1.; 1.; 1.; 1.; 1.; 1.; 1.; 1.; 1.; 1.; 1.; 1.; 1.;
159 |              1.; 2.; 1.; 1.; 2.; 1.; 1.; 1.; 1.; 2.; 1.; 1.; 1.; 1.; 1.; 1.;
160 |              1.; 1.; 1.; 1.; 1.; 1.; 2.; 1.; 1.; 1.; 2.; 1.; 1.; 1.; 1.; 1.;
161 |              1.|]
162 |   in begin
163 |     assert_equal_test_results ~msg:"with continuity correction and ties"
164 |       (MannWhitneyU.two_sample_independent v1 v2 ~correction:true)
165 |       [{ test_statistic = 16980.5; test_pvalue = 0.0000564287 };
166 |        { test_statistic = 16980.5; test_pvalue = 0.0000282143 };
167 |        { test_statistic = 16980.5; test_pvalue = 0.999972 }];
168 | 
169 |     assert_equal_test_results ~msg:"with continuity correction and no ties"
170 |       (MannWhitneyU.two_sample_independent
171 |          [|11; 4; 2; 5|] [|12; 3; 9; 7|] ~correction:true)
172 |       [{ test_statistic = 5.; test_pvalue = 0.4857143 };
173 |        { test_statistic = 5.; test_pvalue = 0.2428571 };
174 |        { test_statistic = 5.; test_pvalue = 0.8285714 }]
175 |   end
176 | 
177 | and wilcoxon_signed_rank_test_one_sample () =
178 |   begin
179 |     (* Source: Sheskin, Example 6.1.
180 | 
181 |        Note(superbobry): we don't cross-check statistic value with R
182 |        here, because R computes W = W-, while our implementation
183 |        follows Sheskin and defines W = min(W-, W+). I guess that's
184 |        why one-tailed P-values are flipped. *)
185 |     assert_equal_test_results ~msg:"with continuity correction and ties"
186 |       (WilcoxonT.one_sample [|9.; 10.; 8.; 4.; 8.; 3.; 0.; 10.; 15.; 9.|]
187 |          ~shift:5. ~correction:true)
188 |       [{ test_statistic = 11.; test_pvalue = 0.101575647 };
189 |        { test_statistic = 11.; test_pvalue = 0.0507878 };
190 |        { test_statistic = 11.; test_pvalue = 0.959035 }];
191 | 
192 |     assert_equal_test_results ~msg:"with continuity correction and no ties"
193 |       (WilcoxonT.one_sample [|9.; 10.; 8.; 4.; 3.; 15.|]
194 |          ~shift:5. ~correction:true)
195 |       [{ test_statistic = 3.; test_pvalue = 0.15625 };
196 |        { test_statistic = 3.; test_pvalue = 0.078125 };
197 |        { test_statistic = 3.; test_pvalue = 0.953125 }]
198 |   end
199 | 
200 | and wilcoxon_signed_rank_test_two_samples () =
201 |   let v1 = [|78.; 24.; 64.; 45.; 64.; 52.; 30.; 50.; 64.; 50.;
202 |              78.; 22.; 84.; 40.; 90.; 72.|]
203 |   and v2 = [|78.; 24.; 62.; 48.; 68.; 56.; 25.; 44.; 56.; 40.;
204 |              68.; 36.; 68.; 20.; 58.; 32.|]
205 |   in begin
206 |     assert_equal_test_results ~msg:"with continuity correction and ties"
207 |       (WilcoxonT.two_sample_paired v1 v2 ~correction:true)
208 |       [{ test_statistic = 19.; test_pvalue = 0.0382053143 };
209 |        { test_statistic = 19.; test_pvalue = 0.01910266 };
210 |        { test_statistic = 19.; test_pvalue = 0.983638 }]
211 |   end
212 | 
213 | and sign_test_one_sample () =
214 |   (* Source: Sheskin, Example 9.7. *)
215 |   let vs =
216 |     [|230.; 167.; 250.; 345.; 442.; 190.; 200.; 248.; 289.; 262.; 301.|]
217 |   in begin
218 |     assert_equal_test_results
219 |       (Sign.one_sample vs ~shift:200.)
220 |       [{ test_statistic = 8.; test_pvalue = 0.109375 };
221 |        { test_statistic = 8.; test_pvalue = 0.9892578 };
222 |        { test_statistic = 8.; test_pvalue = 0.0546875 }]
223 |   end
224 | 
225 | and sign_test_two_sample () =
226 |   (* Source: Sheskin, Example 19.1. *)
227 |   let v1 = [|9.; 2.; 1.; 4.; 6.; 4.; 7.; 8.; 5.; 1.|]
228 |   and v2 = [|8.; 2.; 3.; 2.; 3.; 0.; 4.; 5.; 4.; 0.|]
229 |   in begin
230 |     assert_equal_test_results
231 |       (Sign.two_sample_paired v1 v2)
232 |       [{ test_statistic = 8.; test_pvalue = 0.0390625 };
233 |        { test_statistic = 8.; test_pvalue = 0.9980468 };
234 |        { test_statistic = 8.; test_pvalue = 0.01953125 }]
235 |   end
236 | 
237 | 
238 | let test_hb_adjust () =
239 |   let pvalues =
240 |     [|0.000962882346117542; 0.00189844480724466; 0.0183097438104205;
241 |       0.0315318359604176; 0.0481693657349631; 0.105687877464594;
242 |       0.543211136961355; 0.565056666152251; 0.603476808731503;
243 |       0.955690764788587|]
244 |   and adjusted_pvalues =
245 |     [|0.00962882346117542; 0.0170860032652019; 0.146477950483364;
246 |       0.220722851722923; 0.289016194409779; 0.528439387322972;
247 |       1.; 1.; 1.; 1.|]
248 |   in begin
249 |     assert_equal
250 |       ~cmp:(cmp_array ~cmp:(cmp_float ~epsilon:1e-6))
251 |       ~printer:(printer_array ~printer:(Printf.sprintf "%.6f"))
252 |       adjusted_pvalues
253 |       Multiple.(adjust pvalues HolmBonferroni)
254 |   end
255 | 
256 | and test_bh_adjust () =
257 |   let pvalues =
258 |     [|0.000962882346117542; 0.00189844480724466; 0.0183097438104205;
259 |       0.0315318359604176; 0.0481693657349631; 0.105687877464594;
260 |       0.543211136961355; 0.565056666152251; 0.603476808731503;
261 |       0.955690764788587|]
262 |   and adjusted_pvalues =
263 |     [|0.00949222403622329; 0.00949222403622329; 0.0610324793680683;
264 |       0.078829589901044; 0.0963387314699263; 0.176146462440991;
265 |       0.670529787479448; 0.670529787479448; 0.670529787479448;
266 |       0.955690764788587|]
267 |   in begin
268 |     assert_equal
269 |       ~cmp:(cmp_array ~cmp:(cmp_float ~epsilon:1e-6))
270 |       ~printer:(printer_array ~printer:(Printf.sprintf "%.6f"))
271 |       adjusted_pvalues
272 |       Multiple.(adjust pvalues BenjaminiHochberg)
273 |   end
274 | 
275 | 
276 | let test = "Tests" >::: [
277 |     "one-sample t-test" >:: t_test_one_sample;
278 |     "two-sample t-test for independent samples" >::
279 |       t_test_two_sample_independent;
280 |     "two-sample t-test for paired samples" >:: t_test_two_sample_paired;
281 |     "X^2 test for goodness of fit" >:: chisq_test_gof;
282 |     "X^2 test for independence" >:: chisq_test_independence;
283 |     "one-sample Kolmogorov-Smirnov test for goodness of fit" >:: ks_test_gof;
284 |     "two-sample Kolmogorov-Smirnov test" >:: ks_test_two_sample;
285 |     "two-sample Mann-Whitney test for independent samples" >::
286 |       mann_whitney_test_two_sample;
287 |     "one-sample Wilcoxon signed-rank test" >::
288 |       wilcoxon_signed_rank_test_one_sample;
289 |     "two-sample Wilcoxon signed-rank test for paired samples" >::
290 |       wilcoxon_signed_rank_test_two_samples;
291 |     "one-sample sign test" >:: sign_test_one_sample;
292 |     "two-sample sign test" >:: sign_test_two_sample;
293 | 
294 |     "Holm-Bonferroni P-value adjustment" >:: test_hb_adjust;
295 |     "Benjamini-Hochberg P-value adjustment" >:: test_bh_adjust
296 |   ]
297 | 


--------------------------------------------------------------------------------
/myocamlbuild.ml:
--------------------------------------------------------------------------------
  1 | (* OASIS_START *)
  2 | (* DO NOT EDIT (digest: ee450cf06c8efebd990f1826664f8a22) *)
  3 | module OASISGettext = struct
  4 | (* # 22 "src/oasis/OASISGettext.ml" *)
  5 | 
  6 | 
  7 |   let ns_ str =
  8 |     str
  9 | 
 10 | 
 11 |   let s_ str =
 12 |     str
 13 | 
 14 | 
 15 |   let f_ (str: ('a, 'b, 'c, 'd) format4) =
 16 |     str
 17 | 
 18 | 
 19 |   let fn_ fmt1 fmt2 n =
 20 |     if n = 1 then
 21 |       fmt1^^""
 22 |     else
 23 |       fmt2^^""
 24 | 
 25 | 
 26 |   let init =
 27 |     []
 28 | 
 29 | 
 30 | end
 31 | 
 32 | module OASISExpr = struct
 33 | (* # 22 "src/oasis/OASISExpr.ml" *)
 34 | 
 35 | 
 36 | 
 37 | 
 38 | 
 39 |   open OASISGettext
 40 | 
 41 | 
 42 |   type test = string
 43 | 
 44 | 
 45 |   type flag = string
 46 | 
 47 | 
 48 |   type t =
 49 |     | EBool of bool
 50 |     | ENot of t
 51 |     | EAnd of t * t
 52 |     | EOr of t * t
 53 |     | EFlag of flag
 54 |     | ETest of test * string
 55 | 
 56 | 
 57 | 
 58 |   type 'a choices = (t * 'a) list
 59 | 
 60 | 
 61 |   let eval var_get t =
 62 |     let rec eval' =
 63 |       function
 64 |         | EBool b ->
 65 |             b
 66 | 
 67 |         | ENot e ->
 68 |             not (eval' e)
 69 | 
 70 |         | EAnd (e1, e2) ->
 71 |             (eval' e1) && (eval' e2)
 72 | 
 73 |         | EOr (e1, e2) ->
 74 |             (eval' e1) || (eval' e2)
 75 | 
 76 |         | EFlag nm ->
 77 |             let v =
 78 |               var_get nm
 79 |             in
 80 |               assert(v = "true" || v = "false");
 81 |               (v = "true")
 82 | 
 83 |         | ETest (nm, vl) ->
 84 |             let v =
 85 |               var_get nm
 86 |             in
 87 |               (v = vl)
 88 |     in
 89 |       eval' t
 90 | 
 91 | 
 92 |   let choose ?printer ?name var_get lst =
 93 |     let rec choose_aux =
 94 |       function
 95 |         | (cond, vl) :: tl ->
 96 |             if eval var_get cond then
 97 |               vl
 98 |             else
 99 |               choose_aux tl
100 |         | [] ->
101 |             let str_lst =
102 |               if lst = [] then
103 |                 s_ "<empty>"
104 |               else
105 |                 String.concat
106 |                   (s_ ", ")
107 |                   (List.map
108 |                      (fun (cond, vl) ->
109 |                         match printer with
110 |                           | Some p -> p vl
111 |                           | None -> s_ "<no printer>")
112 |                      lst)
113 |             in
114 |               match name with
115 |                 | Some nm ->
116 |                     failwith
117 |                       (Printf.sprintf
118 |                          (f_ "No result for the choice list '%s': %s")
119 |                          nm str_lst)
120 |                 | None ->
121 |                     failwith
122 |                       (Printf.sprintf
123 |                          (f_ "No result for a choice list: %s")
124 |                          str_lst)
125 |     in
126 |       choose_aux (List.rev lst)
127 | 
128 | 
129 | end
130 | 
131 | 
132 | # 132 "myocamlbuild.ml"
133 | module BaseEnvLight = struct
134 | (* # 22 "src/base/BaseEnvLight.ml" *)
135 | 
136 | 
137 |   module MapString = Map.Make(String)
138 | 
139 | 
140 |   type t = string MapString.t
141 | 
142 | 
143 |   let default_filename =
144 |     Filename.concat
145 |       (Sys.getcwd ())
146 |       "setup.data"
147 | 
148 | 
149 |   let load ?(allow_empty=false) ?(filename=default_filename) () =
150 |     if Sys.file_exists filename then
151 |       begin
152 |         let chn =
153 |           open_in_bin filename
154 |         in
155 |         let st =
156 |           Stream.of_channel chn
157 |         in
158 |         let line =
159 |           ref 1
160 |         in
161 |         let st_line =
162 |           Stream.from
163 |             (fun _ ->
164 |                try
165 |                  match Stream.next st with
166 |                    | '\n' -> incr line; Some '\n'
167 |                    | c -> Some c
168 |                with Stream.Failure -> None)
169 |         in
170 |         let lexer =
171 |           Genlex.make_lexer ["="] st_line
172 |         in
173 |         let rec read_file mp =
174 |           match Stream.npeek 3 lexer with
175 |             | [Genlex.Ident nm; Genlex.Kwd "="; Genlex.String value] ->
176 |                 Stream.junk lexer;
177 |                 Stream.junk lexer;
178 |                 Stream.junk lexer;
179 |                 read_file (MapString.add nm value mp)
180 |             | [] ->
181 |                 mp
182 |             | _ ->
183 |                 failwith
184 |                   (Printf.sprintf
185 |                      "Malformed data file '%s' line %d"
186 |                      filename !line)
187 |         in
188 |         let mp =
189 |           read_file MapString.empty
190 |         in
191 |           close_in chn;
192 |           mp
193 |       end
194 |     else if allow_empty then
195 |       begin
196 |         MapString.empty
197 |       end
198 |     else
199 |       begin
200 |         failwith
201 |           (Printf.sprintf
202 |              "Unable to load environment, the file '%s' doesn't exist."
203 |              filename)
204 |       end
205 | 
206 | 
207 |   let rec var_expand str env =
208 |     let buff =
209 |       Buffer.create ((String.length str) * 2)
210 |     in
211 |       Buffer.add_substitute
212 |         buff
213 |         (fun var ->
214 |            try
215 |              var_expand (MapString.find var env) env
216 |            with Not_found ->
217 |              failwith
218 |                (Printf.sprintf
219 |                   "No variable %s defined when trying to expand %S."
220 |                   var
221 |                   str))
222 |         str;
223 |       Buffer.contents buff
224 | 
225 | 
226 |   let var_get name env =
227 |     var_expand (MapString.find name env) env
228 | 
229 | 
230 |   let var_choose lst env =
231 |     OASISExpr.choose
232 |       (fun nm -> var_get nm env)
233 |       lst
234 | end
235 | 
236 | 
237 | # 237 "myocamlbuild.ml"
238 | module MyOCamlbuildFindlib = struct
239 | (* # 22 "src/plugins/ocamlbuild/MyOCamlbuildFindlib.ml" *)
240 | 
241 | 
242 |   (** OCamlbuild extension, copied from
243 |     * http://brion.inria.fr/gallium/index.php/Using_ocamlfind_with_ocamlbuild
244 |     * by N. Pouillard and others
245 |     *
246 |     * Updated on 2009/02/28
247 |     *
248 |     * Modified by Sylvain Le Gall
249 |     *)
250 |   open Ocamlbuild_plugin
251 | 
252 |   type conf =
253 |     { no_automatic_syntax: bool;
254 |     }
255 | 
256 |   (* these functions are not really officially exported *)
257 |   let run_and_read =
258 |     Ocamlbuild_pack.My_unix.run_and_read
259 | 
260 | 
261 |   let blank_sep_strings =
262 |     Ocamlbuild_pack.Lexers.blank_sep_strings
263 | 
264 | 
265 |   let exec_from_conf exec =
266 |     let exec =
267 |       let env_filename = Pathname.basename BaseEnvLight.default_filename in
268 |       let env = BaseEnvLight.load ~filename:env_filename ~allow_empty:true () in
269 |       try
270 |         BaseEnvLight.var_get exec env
271 |       with Not_found ->
272 |         Printf.eprintf "W: Cannot get variable %s\n" exec;
273 |         exec
274 |     in
275 |     let fix_win32 str =
276 |       if Sys.os_type = "Win32" then begin
277 |         let buff = Buffer.create (String.length str) in
278 |         (* Adapt for windowsi, ocamlbuild + win32 has a hard time to handle '\\'.
279 |          *)
280 |         String.iter
281 |           (fun c -> Buffer.add_char buff (if c = '\\' then '/' else c))
282 |           str;
283 |         Buffer.contents buff
284 |       end else begin
285 |         str
286 |       end
287 |     in
288 |       fix_win32 exec
289 | 
290 |   let split s ch =
291 |     let buf = Buffer.create 13 in
292 |     let x = ref [] in
293 |     let flush () =
294 |       x := (Buffer.contents buf) :: !x;
295 |       Buffer.clear buf
296 |     in
297 |       String.iter
298 |         (fun c ->
299 |            if c = ch then
300 |              flush ()
301 |            else
302 |              Buffer.add_char buf c)
303 |         s;
304 |       flush ();
305 |       List.rev !x
306 | 
307 | 
308 |   let split_nl s = split s '\n'
309 | 
310 | 
311 |   let before_space s =
312 |     try
313 |       String.before s (String.index s ' ')
314 |     with Not_found -> s
315 | 
316 |   (* ocamlfind command *)
317 |   let ocamlfind x = S[Sh (exec_from_conf "ocamlfind"); x]
318 | 
319 |   (* This lists all supported packages. *)
320 |   let find_packages () =
321 |     List.map before_space (split_nl & run_and_read (exec_from_conf "ocamlfind" ^ " list"))
322 | 
323 | 
324 |   (* Mock to list available syntaxes. *)
325 |   let find_syntaxes () = ["camlp4o"; "camlp4r"]
326 | 
327 | 
328 |   let well_known_syntax = [
329 |     "camlp4.quotations.o";
330 |     "camlp4.quotations.r";
331 |     "camlp4.exceptiontracer";
332 |     "camlp4.extend";
333 |     "camlp4.foldgenerator";
334 |     "camlp4.listcomprehension";
335 |     "camlp4.locationstripper";
336 |     "camlp4.macro";
337 |     "camlp4.mapgenerator";
338 |     "camlp4.metagenerator";
339 |     "camlp4.profiler";
340 |     "camlp4.tracer"
341 |   ]
342 | 
343 | 
344 |   let dispatch conf =
345 |     function
346 |       | After_options ->
347 |           (* By using Before_options one let command line options have an higher
348 |            * priority on the contrary using After_options will guarantee to have
349 |            * the higher priority override default commands by ocamlfind ones *)
350 |           Options.ocamlc     := ocamlfind & A"ocamlc";
351 |           Options.ocamlopt   := ocamlfind & A"ocamlopt";
352 |           Options.ocamldep   := ocamlfind & A"ocamldep";
353 |           Options.ocamldoc   := ocamlfind & A"ocamldoc";
354 |           Options.ocamlmktop := ocamlfind & A"ocamlmktop";
355 |           Options.ocamlmklib := ocamlfind & A"ocamlmklib"
356 | 
357 |       | After_rules ->
358 | 
359 |           (* When one link an OCaml library/binary/package, one should use
360 |            * -linkpkg *)
361 |           flag ["ocaml"; "link"; "program"] & A"-linkpkg";
362 | 
363 |           if not (conf.no_automatic_syntax) then begin
364 |             (* For each ocamlfind package one inject the -package option when
365 |              * compiling, computing dependencies, generating documentation and
366 |              * linking. *)
367 |             List.iter
368 |               begin fun pkg ->
369 |                 let base_args = [A"-package"; A pkg] in
370 |                 (* TODO: consider how to really choose camlp4o or camlp4r. *)
371 |                 let syn_args = [A"-syntax"; A "camlp4o"] in
372 |                 let (args, pargs) =
373 |                   (* Heuristic to identify syntax extensions: whether they end in
374 |                      ".syntax"; some might not.
375 |                   *)
376 |                   if Filename.check_suffix pkg "syntax" ||
377 |                      List.mem pkg well_known_syntax then
378 |                     (syn_args @ base_args, syn_args)
379 |                   else
380 |                     (base_args, [])
381 |                 in
382 |                 flag ["ocaml"; "compile";  "pkg_"^pkg] & S args;
383 |                 flag ["ocaml"; "ocamldep"; "pkg_"^pkg] & S args;
384 |                 flag ["ocaml"; "doc";      "pkg_"^pkg] & S args;
385 |                 flag ["ocaml"; "link";     "pkg_"^pkg] & S base_args;
386 |                 flag ["ocaml"; "infer_interface"; "pkg_"^pkg] & S args;
387 | 
388 |                 (* TODO: Check if this is allowed for OCaml < 3.12.1 *)
389 |                 flag ["ocaml"; "compile";  "package("^pkg^")"] & S pargs;
390 |                 flag ["ocaml"; "ocamldep"; "package("^pkg^")"] & S pargs;
391 |                 flag ["ocaml"; "doc";      "package("^pkg^")"] & S pargs;
392 |                 flag ["ocaml"; "infer_interface"; "package("^pkg^")"] & S pargs;
393 |               end
394 |               (find_packages ());
395 |           end;
396 | 
397 |           (* Like -package but for extensions syntax. Morover -syntax is useless
398 |            * when linking. *)
399 |           List.iter begin fun syntax ->
400 |           flag ["ocaml"; "compile";  "syntax_"^syntax] & S[A"-syntax"; A syntax];
401 |           flag ["ocaml"; "ocamldep"; "syntax_"^syntax] & S[A"-syntax"; A syntax];
402 |           flag ["ocaml"; "doc";      "syntax_"^syntax] & S[A"-syntax"; A syntax];
403 |           flag ["ocaml"; "infer_interface"; "syntax_"^syntax] &
404 |                 S[A"-syntax"; A syntax];
405 |           end (find_syntaxes ());
406 | 
407 |           (* The default "thread" tag is not compatible with ocamlfind.
408 |            * Indeed, the default rules add the "threads.cma" or "threads.cmxa"
409 |            * options when using this tag. When using the "-linkpkg" option with
410 |            * ocamlfind, this module will then be added twice on the command line.
411 |            *
412 |            * To solve this, one approach is to add the "-thread" option when using
413 |            * the "threads" package using the previous plugin.
414 |            *)
415 |           flag ["ocaml"; "pkg_threads"; "compile"] (S[A "-thread"]);
416 |           flag ["ocaml"; "pkg_threads"; "doc"] (S[A "-I"; A "+threads"]);
417 |           flag ["ocaml"; "pkg_threads"; "link"] (S[A "-thread"]);
418 |           flag ["ocaml"; "pkg_threads"; "infer_interface"] (S[A "-thread"]);
419 |           flag ["ocaml"; "package(threads)"; "compile"] (S[A "-thread"]);
420 |           flag ["ocaml"; "package(threads)"; "doc"] (S[A "-I"; A "+threads"]);
421 |           flag ["ocaml"; "package(threads)"; "link"] (S[A "-thread"]);
422 |           flag ["ocaml"; "package(threads)"; "infer_interface"] (S[A "-thread"]);
423 | 
424 |       | _ ->
425 |           ()
426 | end
427 | 
428 | module MyOCamlbuildBase = struct
429 | (* # 22 "src/plugins/ocamlbuild/MyOCamlbuildBase.ml" *)
430 | 
431 | 
432 |   (** Base functions for writing myocamlbuild.ml
433 |       @author Sylvain Le Gall
434 |     *)
435 | 
436 | 
437 | 
438 | 
439 | 
440 |   open Ocamlbuild_plugin
441 |   module OC = Ocamlbuild_pack.Ocaml_compiler
442 | 
443 | 
444 |   type dir = string
445 |   type file = string
446 |   type name = string
447 |   type tag = string
448 | 
449 | 
450 | (* # 62 "src/plugins/ocamlbuild/MyOCamlbuildBase.ml" *)
451 | 
452 | 
453 |   type t =
454 |       {
455 |         lib_ocaml: (name * dir list * string list) list;
456 |         lib_c:     (name * dir * file list) list;
457 |         flags:     (tag list * (spec OASISExpr.choices)) list;
458 |         (* Replace the 'dir: include' from _tags by a precise interdepends in
459 |          * directory.
460 |          *)
461 |         includes:  (dir * dir list) list;
462 |       }
463 | 
464 | 
465 |   let env_filename =
466 |     Pathname.basename
467 |       BaseEnvLight.default_filename
468 | 
469 | 
470 |   let dispatch_combine lst =
471 |     fun e ->
472 |       List.iter
473 |         (fun dispatch -> dispatch e)
474 |         lst
475 | 
476 | 
477 |   let tag_libstubs nm =
478 |     "use_lib"^nm^"_stubs"
479 | 
480 | 
481 |   let nm_libstubs nm =
482 |     nm^"_stubs"
483 | 
484 | 
485 |   let dispatch t e =
486 |     let env =
487 |       BaseEnvLight.load
488 |         ~filename:env_filename
489 |         ~allow_empty:true
490 |         ()
491 |     in
492 |       match e with
493 |         | Before_options ->
494 |             let no_trailing_dot s =
495 |               if String.length s >= 1 && s.[0] = '.' then
496 |                 String.sub s 1 ((String.length s) - 1)
497 |               else
498 |                 s
499 |             in
500 |               List.iter
501 |                 (fun (opt, var) ->
502 |                    try
503 |                      opt := no_trailing_dot (BaseEnvLight.var_get var env)
504 |                    with Not_found ->
505 |                      Printf.eprintf "W: Cannot get variable %s\n" var)
506 |                 [
507 |                   Options.ext_obj, "ext_obj";
508 |                   Options.ext_lib, "ext_lib";
509 |                   Options.ext_dll, "ext_dll";
510 |                 ]
511 | 
512 |         | After_rules ->
513 |             (* Declare OCaml libraries *)
514 |             List.iter
515 |               (function
516 |                  | nm, [], intf_modules ->
517 |                      ocaml_lib nm;
518 |                      let cmis =
519 |                        List.map (fun m -> (String.uncapitalize m) ^ ".cmi")
520 |                                 intf_modules in
521 |                      dep ["ocaml"; "link"; "library"; "file:"^nm^".cma"] cmis
522 |                  | nm, dir :: tl, intf_modules ->
523 |                      ocaml_lib ~dir:dir (dir^"/"^nm);
524 |                      List.iter
525 |                        (fun dir ->
526 |                           List.iter
527 |                             (fun str ->
528 |                                flag ["ocaml"; "use_"^nm; str] (S[A"-I"; P dir]))
529 |                             ["compile"; "infer_interface"; "doc"])
530 |                        tl;
531 |                      let cmis =
532 |                        List.map (fun m -> dir^"/"^(String.uncapitalize m)^".cmi")
533 |                                 intf_modules in
534 |                      dep ["ocaml"; "link"; "library"; "file:"^dir^"/"^nm^".cma"]
535 |                          cmis)
536 |               t.lib_ocaml;
537 | 
538 |             (* Declare directories dependencies, replace "include" in _tags. *)
539 |             List.iter
540 |               (fun (dir, include_dirs) ->
541 |                  Pathname.define_context dir include_dirs)
542 |               t.includes;
543 | 
544 |             (* Declare C libraries *)
545 |             List.iter
546 |               (fun (lib, dir, headers) ->
547 |                    (* Handle C part of library *)
548 |                    flag ["link"; "library"; "ocaml"; "byte"; tag_libstubs lib]
549 |                      (S[A"-dllib"; A("-l"^(nm_libstubs lib)); A"-cclib";
550 |                         A("-l"^(nm_libstubs lib))]);
551 | 
552 |                    flag ["link"; "library"; "ocaml"; "native"; tag_libstubs lib]
553 |                      (S[A"-cclib"; A("-l"^(nm_libstubs lib))]);
554 | 
555 |                    flag ["link"; "program"; "ocaml"; "byte"; tag_libstubs lib]
556 |                      (S[A"-dllib"; A("dll"^(nm_libstubs lib))]);
557 | 
558 |                    (* When ocaml link something that use the C library, then one
559 |                       need that file to be up to date.
560 |                       This holds both for programs and for libraries.
561 |                     *)
562 |   		 dep ["link"; "ocaml"; tag_libstubs lib]
563 |   		     [dir/"lib"^(nm_libstubs lib)^"."^(!Options.ext_lib)];
564 | 
565 |   		 dep  ["compile"; "ocaml"; tag_libstubs lib]
566 |   		      [dir/"lib"^(nm_libstubs lib)^"."^(!Options.ext_lib)];
567 | 
568 |                    (* TODO: be more specific about what depends on headers *)
569 |                    (* Depends on .h files *)
570 |                    dep ["compile"; "c"]
571 |                      headers;
572 | 
573 |                    (* Setup search path for lib *)
574 |                    flag ["link"; "ocaml"; "use_"^lib]
575 |                      (S[A"-I"; P(dir)]);
576 |               )
577 |               t.lib_c;
578 | 
579 |               (* Add flags *)
580 |               List.iter
581 |               (fun (tags, cond_specs) ->
582 |                  let spec = BaseEnvLight.var_choose cond_specs env in
583 |                  let rec eval_specs =
584 |                    function
585 |                      | S lst -> S (List.map eval_specs lst)
586 |                      | A str -> A (BaseEnvLight.var_expand str env)
587 |                      | spec -> spec
588 |                  in
589 |                    flag tags & (eval_specs spec))
590 |               t.flags
591 |         | _ ->
592 |             ()
593 | 
594 | 
595 |   let dispatch_default conf t =
596 |     dispatch_combine
597 |       [
598 |         dispatch t;
599 |         MyOCamlbuildFindlib.dispatch conf;
600 |       ]
601 | 
602 | 
603 | end
604 | 
605 | 
606 | # 606 "myocamlbuild.ml"
607 | open Ocamlbuild_plugin;;
608 | let package_default =
609 |   {
610 |      MyOCamlbuildBase.lib_ocaml = [("pareto", ["lib"], [])];
611 |      lib_c = [];
612 |      flags =
613 |        [
614 |           (["oasis_library_pareto_byte"; "ocaml"; "link"; "byte"],
615 |             [
616 |                (OASISExpr.EBool true, S []);
617 |                (OASISExpr.EFlag "strict",
618 |                  S [A "-w"; A "@a"; A "-warn-error"; A "-a"])
619 |             ]);
620 |           (["oasis_library_pareto_native"; "ocaml"; "link"; "native"],
621 |             [
622 |                (OASISExpr.EBool true, S []);
623 |                (OASISExpr.EFlag "strict",
624 |                  S [A "-w"; A "@a"; A "-warn-error"; A "-a"])
625 |             ]);
626 |           (["oasis_library_pareto_byte"; "ocaml"; "ocamldep"; "byte"],
627 |             [
628 |                (OASISExpr.EBool true, S []);
629 |                (OASISExpr.EFlag "strict",
630 |                  S [A "-w"; A "@a"; A "-warn-error"; A "-a"])
631 |             ]);
632 |           (["oasis_library_pareto_native"; "ocaml"; "ocamldep"; "native"],
633 |             [
634 |                (OASISExpr.EBool true, S []);
635 |                (OASISExpr.EFlag "strict",
636 |                  S [A "-w"; A "@a"; A "-warn-error"; A "-a"])
637 |             ]);
638 |           (["oasis_library_pareto_byte"; "ocaml"; "compile"; "byte"],
639 |             [
640 |                (OASISExpr.EBool true, S []);
641 |                (OASISExpr.EFlag "strict",
642 |                  S [A "-w"; A "@a"; A "-warn-error"; A "-a"])
643 |             ]);
644 |           (["oasis_library_pareto_native"; "ocaml"; "compile"; "native"],
645 |             [
646 |                (OASISExpr.EBool true, S []);
647 |                (OASISExpr.EFlag "strict",
648 |                  S [A "-w"; A "@a"; A "-warn-error"; A "-a"])
649 |             ]);
650 |           (["oasis_executable_base_ex_byte"; "ocaml"; "link"; "byte"],
651 |             [(OASISExpr.EBool true, S [A "-w"; A "@a"])]);
652 |           (["oasis_executable_base_ex_native"; "ocaml"; "link"; "native"],
653 |             [(OASISExpr.EBool true, S [A "-w"; A "@a"])]);
654 |           (["oasis_executable_base_ex_byte"; "ocaml"; "ocamldep"; "byte"],
655 |             [(OASISExpr.EBool true, S [A "-w"; A "@a"])]);
656 |           (["oasis_executable_base_ex_native"; "ocaml"; "ocamldep"; "native"],
657 |             [(OASISExpr.EBool true, S [A "-w"; A "@a"])]);
658 |           (["oasis_executable_base_ex_byte"; "ocaml"; "compile"; "byte"],
659 |             [(OASISExpr.EBool true, S [A "-w"; A "@a"])]);
660 |           (["oasis_executable_base_ex_native"; "ocaml"; "compile"; "native"],
661 |             [(OASISExpr.EBool true, S [A "-w"; A "@a"])]);
662 |           (["oasis_executable_distributions_ex_byte"; "ocaml"; "link"; "byte"
663 |            ],
664 |             [(OASISExpr.EBool true, S [A "-w"; A "@a"])]);
665 |           ([
666 |               "oasis_executable_distributions_ex_native";
667 |               "ocaml";
668 |               "link";
669 |               "native"
670 |            ],
671 |             [(OASISExpr.EBool true, S [A "-w"; A "@a"])]);
672 |           ([
673 |               "oasis_executable_distributions_ex_byte";
674 |               "ocaml";
675 |               "ocamldep";
676 |               "byte"
677 |            ],
678 |             [(OASISExpr.EBool true, S [A "-w"; A "@a"])]);
679 |           ([
680 |               "oasis_executable_distributions_ex_native";
681 |               "ocaml";
682 |               "ocamldep";
683 |               "native"
684 |            ],
685 |             [(OASISExpr.EBool true, S [A "-w"; A "@a"])]);
686 |           ([
687 |               "oasis_executable_distributions_ex_byte";
688 |               "ocaml";
689 |               "compile";
690 |               "byte"
691 |            ],
692 |             [(OASISExpr.EBool true, S [A "-w"; A "@a"])]);
693 |           ([
694 |               "oasis_executable_distributions_ex_native";
695 |               "ocaml";
696 |               "compile";
697 |               "native"
698 |            ],
699 |             [(OASISExpr.EBool true, S [A "-w"; A "@a"])]);
700 |           (["oasis_executable_tests_ex_byte"; "ocaml"; "link"; "byte"],
701 |             [(OASISExpr.EBool true, S [A "-w"; A "@a"])]);
702 |           (["oasis_executable_tests_ex_native"; "ocaml"; "link"; "native"],
703 |             [(OASISExpr.EBool true, S [A "-w"; A "@a"])]);
704 |           (["oasis_executable_tests_ex_byte"; "ocaml"; "ocamldep"; "byte"],
705 |             [(OASISExpr.EBool true, S [A "-w"; A "@a"])]);
706 |           (["oasis_executable_tests_ex_native"; "ocaml"; "ocamldep"; "native"
707 |            ],
708 |             [(OASISExpr.EBool true, S [A "-w"; A "@a"])]);
709 |           (["oasis_executable_tests_ex_byte"; "ocaml"; "compile"; "byte"],
710 |             [(OASISExpr.EBool true, S [A "-w"; A "@a"])]);
711 |           (["oasis_executable_tests_ex_native"; "ocaml"; "compile"; "native"],
712 |             [(OASISExpr.EBool true, S [A "-w"; A "@a"])]);
713 |           (["oasis_executable_sample_ex_byte"; "ocaml"; "link"; "byte"],
714 |             [(OASISExpr.EBool true, S [A "-w"; A "@a"])]);
715 |           (["oasis_executable_sample_ex_native"; "ocaml"; "link"; "native"],
716 |             [(OASISExpr.EBool true, S [A "-w"; A "@a"])]);
717 |           (["oasis_executable_sample_ex_byte"; "ocaml"; "ocamldep"; "byte"],
718 |             [(OASISExpr.EBool true, S [A "-w"; A "@a"])]);
719 |           ([
720 |               "oasis_executable_sample_ex_native";
721 |               "ocaml";
722 |               "ocamldep";
723 |               "native"
724 |            ],
725 |             [(OASISExpr.EBool true, S [A "-w"; A "@a"])]);
726 |           (["oasis_executable_sample_ex_byte"; "ocaml"; "compile"; "byte"],
727 |             [(OASISExpr.EBool true, S [A "-w"; A "@a"])]);
728 |           (["oasis_executable_sample_ex_native"; "ocaml"; "compile"; "native"
729 |            ],
730 |             [(OASISExpr.EBool true, S [A "-w"; A "@a"])]);
731 |           (["oasis_executable_resampling_ex_byte"; "ocaml"; "link"; "byte"],
732 |             [(OASISExpr.EBool true, S [A "-w"; A "@a"])]);
733 |           ([
734 |               "oasis_executable_resampling_ex_native";
735 |               "ocaml";
736 |               "link";
737 |               "native"
738 |            ],
739 |             [(OASISExpr.EBool true, S [A "-w"; A "@a"])]);
740 |           ([
741 |               "oasis_executable_resampling_ex_byte";
742 |               "ocaml";
743 |               "ocamldep";
744 |               "byte"
745 |            ],
746 |             [(OASISExpr.EBool true, S [A "-w"; A "@a"])]);
747 |           ([
748 |               "oasis_executable_resampling_ex_native";
749 |               "ocaml";
750 |               "ocamldep";
751 |               "native"
752 |            ],
753 |             [(OASISExpr.EBool true, S [A "-w"; A "@a"])]);
754 |           (["oasis_executable_resampling_ex_byte"; "ocaml"; "compile"; "byte"
755 |            ],
756 |             [(OASISExpr.EBool true, S [A "-w"; A "@a"])]);
757 |           ([
758 |               "oasis_executable_resampling_ex_native";
759 |               "ocaml";
760 |               "compile";
761 |               "native"
762 |            ],
763 |             [(OASISExpr.EBool true, S [A "-w"; A "@a"])])
764 |        ];
765 |      includes = [("lib_test", ["lib"]); ("examples", ["lib"])]
766 |   }
767 |   ;;
768 | 
769 | let conf = {MyOCamlbuildFindlib.no_automatic_syntax = false}
770 | 
771 | let dispatch_default = MyOCamlbuildBase.dispatch_default conf package_default;;
772 | 
773 | # 774 "myocamlbuild.ml"
774 | (* OASIS_STOP *)
775 | Ocamlbuild_plugin.dispatch dispatch_default;;
776 | 


--------------------------------------------------------------------------------
/opam:
--------------------------------------------------------------------------------
 1 | opam-version: "1"
 2 | maintainer: "superbobry@gmail.com"
 3 | build: [
 4 |   ["./configure" "--prefix" "%{prefix}%"]
 5 |   [make]
 6 |   [make "install"]
 7 | ]
 8 | remove: [
 9 |   ["ocamlfind" "remove" "pareto"]
10 | ]
11 | depends: ["ocamlfind" "gsl" {>= "1.13.0"}]
12 | ocaml-version: [>= "4.00.1"]
13 | 
14 | homepage: "https://github.com/superbobry/pareto"
15 | license: "MIT"
16 | authors: ["Sergei Lebedev"]
17 | 


--------------------------------------------------------------------------------
/setup.ml:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/superbobry/pareto/8b3b27bce7b7df5d9713d16ed40a844861aa368e/setup.ml


--------------------------------------------------------------------------------