├── .gitignore ├── CHANGES ├── LICENSE ├── Makefile ├── README.md ├── _oasis ├── _tags ├── configure ├── examples ├── base_ex.ml ├── common.ml ├── distributions_ex.ml ├── resampling_ex.ml ├── sample_ex.ml └── tests_ex.ml ├── lib ├── API.odocl ├── META ├── base.ml ├── base.mli ├── distributions.ml ├── distributions.mli ├── internal.ml ├── pareto.mldylib ├── pareto.mllib ├── pareto.mlpack ├── resampling.ml ├── resampling.mli ├── sample.ml ├── sample.mli ├── tests.ml └── tests.mli ├── lib_test ├── common.ml ├── distributions_test.ml ├── sample_test.ml ├── test.ml ├── test_runner.ml └── tests_test.ml ├── myocamlbuild.ml ├── opam └── setup.ml /.gitignore: -------------------------------------------------------------------------------- 1 | _build 2 | setup.log 3 | setup.data 4 | *.native 5 | *.byte 6 | *.docdir 7 | *.ba* 8 | -------------------------------------------------------------------------------- /CHANGES: -------------------------------------------------------------------------------- 1 | pareto Changelog 2 | ================ 3 | 4 | Here you can see the full list of changes between each pareto release. 5 | 6 | Version 0.3 7 | ----------- 8 | 9 | Released on March 29th, 2016 10 | 11 | - Added MLE for 'Gamma' distribution. 12 | - Added log-probability and log-density to all distributions. 13 | - Added a method for sampling a single value from a distribution, see 14 | 'BaseDistribution.random'. 15 | - Changed the observation label in 'DiscreteDistribution' from ~n to ~k. 16 | - Bumped minimum required ocaml-gsl version to 1.14.0 and dropped custom 17 | 'Combi' stubs. 18 | 19 | Version 0.2 20 | ----------- 21 | 22 | Released on July 13th, 2013 23 | 24 | - Added summary statistics, see 'Sample.Summary' module. Thanks to Nicholas 25 | Lucaroni! 26 | - Added MLE for distributions which have a closed-form MLE. Thanks to Nicholas 27 | Lucaroni, once again! For distributions, which don't have a closed-form 28 | solution, we currently provide method of moments estimates. Exceptions are: 29 | 'Cauchy' and 'Hypergeomteric' distributions. 30 | - Added Pearson product-moment correlation and autocorrelation and Spearman 31 | rank correlation. 32 | - Added Kolmogorov-Smirnov test for goodness of fit and two-sample test. 33 | - Added function, which adjusts P-values for multiple comparisons. Currently, 34 | two procedures are supported: Benjamini-Hochberg, which controls for FDR and 35 | Holm-Bonferroni, which controls FWER. 36 | - Abstracted RV type into '*Distribution.elt'. 37 | - Added GSL wrappers for computing sample skewness and kurtosis. 38 | - Added more distribution features: skewness and kurtosis, see 39 | 'Distributions.Features' signature. 40 | - Added 'LogNormal', 'Bernoulli', 'Logistic' and 'Categorical' distributions. 41 | - Added a basic test suite against R and SciPy, thanks to Francois Berenger 42 | for the reminder. 43 | 44 | Version 0.1 45 | ----------- 46 | 47 | Initial release, released on June 11th, 2013 48 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Copyright (c) 2013 Sergei Lebedev 2 | 3 | Permission is hereby granted, free of charge, to any person obtaining a copy 4 | of this software and associated documentation files (the "Software"), to deal 5 | in the Software without restriction, including without limitation the rights 6 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 7 | copies of the Software, and to permit persons to whom the Software is 8 | furnished to do so, subject to the following conditions: 9 | 10 | The above copyright notice and this permission notice shall be included in 11 | all copies or substantial portions of the Software. 12 | 13 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 14 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 15 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 16 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 17 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 18 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 19 | THE SOFTWARE. 20 | -------------------------------------------------------------------------------- /Makefile: -------------------------------------------------------------------------------- 1 | # OASIS_START 2 | # DO NOT EDIT (digest: a3c674b4239234cbbe53afe090018954) 3 | 4 | SETUP = ocaml setup.ml 5 | 6 | build: setup.data 7 | $(SETUP) -build $(BUILDFLAGS) 8 | 9 | doc: setup.data build 10 | $(SETUP) -doc $(DOCFLAGS) 11 | 12 | test: setup.data build 13 | $(SETUP) -test $(TESTFLAGS) 14 | 15 | all: 16 | $(SETUP) -all $(ALLFLAGS) 17 | 18 | install: setup.data 19 | $(SETUP) -install $(INSTALLFLAGS) 20 | 21 | uninstall: setup.data 22 | $(SETUP) -uninstall $(UNINSTALLFLAGS) 23 | 24 | reinstall: setup.data 25 | $(SETUP) -reinstall $(REINSTALLFLAGS) 26 | 27 | clean: 28 | $(SETUP) -clean $(CLEANFLAGS) 29 | 30 | distclean: 31 | $(SETUP) -distclean $(DISTCLEANFLAGS) 32 | 33 | setup.data: 34 | $(SETUP) -configure $(CONFIGUREFLAGS) 35 | 36 | configure: 37 | $(SETUP) -configure $(CONFIGUREFLAGS) 38 | 39 | .PHONY: build doc test all install uninstall reinstall clean distclean configure 40 | 41 | # OASIS_STOP 42 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | ![pareto](http://upload.wikimedia.org/wikipedia/commons/5/55/Vilfredo_F._D._Pareto.jpg) 2 | 3 | `pareto` is an OCaml statistics library, based on [GSL] [gsl], which provides: 4 | 5 | * Common statistical tests for significant differences between samples. 6 | * Uniform interface for common discrete and continuous probability distributions. 7 | * Sample statistics, quantile estimation, kernel density estimation. 8 | * Resampling methods: jackknife, BCa bootstrap. 9 | 10 | **Note**: before using `pareto` make sure you understand some of the subtleties 11 | in OCaml GSL bindinds. See section **ERROR HANDLING** in GSL [README] [README]. 12 | 13 | [gsl]: http://www.gnu.org/software/gsl 14 | [README]: https://bitbucket.org/mmottl/gsl-ocaml 15 | 16 | Installation 17 | ------------ 18 | 19 | Make sure you have `gsl-ocaml`, or install it with [OPAM](http://opam.ocamlpro.com): 20 | 21 | ```bash 22 | $ opam install gsl 23 | ``` 24 | 25 | Then go with the usual OASIS routines: 26 | 27 | ```bash 28 | $ ./configure 29 | $ make # And you're done! 30 | ``` 31 | 32 | ### Examples _(optional)_ 33 | 34 | To build examples: 35 | 36 | ```bash 37 | $ ./configure --enable-examples 38 | $ make 39 | ``` 40 | 41 | Here's a simple t-test: 42 | 43 | ```ocaml 44 | open Statistics 45 | 46 | let open Distributions.Normal in 47 | let v = sample ~size:10 standard in 48 | let open Tests in 49 | let { test_statistic = t; test_pvalue } = 50 | T.one_sample v ~mean:0. ~alternative:TwoSided () 51 | in begin 52 | printf "One-sample T-test for true mean = 0.0\n"; 53 | printf "t = %f, P-value: %f\n" t test_pvalue; 54 | end 55 | ``` 56 | 57 | ### Documentation _(optional)_ 58 | 59 | To build API documentation: 60 | 61 | ```bash 62 | $ make doc 63 | ``` 64 | 65 | ### Tests _(optional)_ [![Build Status][status-image]][status] 66 | 67 | To build and run tests: 68 | 69 | ```bash 70 | $ ./configure --enable-tests 71 | $ make test 72 | ``` 73 | 74 | [status]: https://drone.io/github.com/superbobry/pareto/latest 75 | [status-image]: https://drone.io/github.com/superbobry/pareto/status.png 76 | -------------------------------------------------------------------------------- /_oasis: -------------------------------------------------------------------------------- 1 | OASISFormat: 0.4 2 | Name: pareto 3 | Version: 0.3 4 | Synopsis: GSL powered OCaml statistics library 5 | Authors: Sergei Lebedev 6 | Maintainers: Sergei Lebedev 7 | License: MIT 8 | OCamlVersion: >= 4.0.0 9 | FindlibVersion: >= 1.3.1 10 | Homepage: https://github.com/superbobry/pareto 11 | BuildTools: ocamlbuild, ocamldoc 12 | Plugins: META (0.3), DevFiles (0.3) 13 | 14 | Flag strict 15 | Description: Strict compile-time checks 16 | Default: false 17 | 18 | Library pareto 19 | Path: lib 20 | Pack: true 21 | Modules: Base 22 | , Distributions 23 | , Resampling 24 | , Sample 25 | , Tests 26 | InternalModules: Internal 27 | if flag(strict) 28 | NativeOpt: -w @a -warn-error -a 29 | ByteOpt: -w @a -warn-error -a 30 | BuildDepends: gsl (>= 1.14.0) 31 | 32 | # Tests 33 | 34 | Executable test_runner 35 | Path: lib_test 36 | MainIs: test_runner.ml 37 | Build$: flag(tests) 38 | Custom: true 39 | CompiledObject: best 40 | Install: false 41 | BuildDepends: pareto 42 | , oUnit (>= 2.0.0) 43 | 44 | Test lib_test 45 | Run$: flag(tests) 46 | Command: $test_runner 47 | WorkingDirectory: lib_test 48 | 49 | # Examples 50 | 51 | Flag examples 52 | Description: Build examples 53 | Default: false 54 | 55 | Executable base_ex 56 | Path: examples 57 | Build$: flag(examples) 58 | Install: false 59 | CompiledObject: best 60 | MainIs: base_ex.ml 61 | BuildDepends: gsl, pareto 62 | NativeOpt: -w @a 63 | ByteOpt: -w @a 64 | 65 | Executable distributions_ex 66 | Path: examples 67 | Build$: flag(examples) 68 | Install: false 69 | CompiledObject: best 70 | MainIs: distributions_ex.ml 71 | BuildDepends: gsl, pareto 72 | NativeOpt: -w @a 73 | ByteOpt: -w @a 74 | 75 | Executable tests_ex 76 | Path: examples 77 | Build$: flag(examples) 78 | Install: false 79 | CompiledObject: best 80 | MainIs: tests_ex.ml 81 | BuildDepends: gsl, pareto 82 | NativeOpt: -w @a 83 | ByteOpt: -w @a 84 | 85 | Executable sample_ex 86 | Path: examples 87 | Build$: flag(examples) 88 | Install: false 89 | CompiledObject: best 90 | MainIs: sample_ex.ml 91 | BuildDepends: gsl, pareto 92 | NativeOpt: -w @a 93 | ByteOpt: -w @a 94 | 95 | Executable resampling_ex 96 | Path: examples 97 | Build$: flag(examples) 98 | Install: false 99 | CompiledObject: best 100 | MainIs: resampling_ex.ml 101 | BuildDepends: gsl, pareto 102 | NativeOpt: -w @a 103 | ByteOpt: -w @a 104 | 105 | Document API 106 | Title: API reference for Pareto 107 | Type: OCamlbuild (0.3) 108 | InstallDir: $docdir/api 109 | XOCamlbuildPath: lib 110 | XOCamlbuildLibraries: pareto 111 | 112 | SourceRepository github 113 | Type: git 114 | Location: https://github.com/superbobry/pareto 115 | Browser: https://github.com/superbobry/pareto 116 | Tag: $(pkg_version) 117 | -------------------------------------------------------------------------------- /_tags: -------------------------------------------------------------------------------- 1 | # OASIS_START 2 | # DO NOT EDIT (digest: e03994bacd251b5bdf8e243abc994271) 3 | # Ignore VCS directories, you can use the same kind of rule outside 4 | # OASIS_START/STOP if you want to exclude directories that contains 5 | # useless stuff for the build process 6 | true: annot, bin_annot 7 | <**/.svn>: -traverse 8 | <**/.svn>: not_hygienic 9 | ".bzr": -traverse 10 | ".bzr": not_hygienic 11 | ".hg": -traverse 12 | ".hg": not_hygienic 13 | ".git": -traverse 14 | ".git": not_hygienic 15 | "_darcs": -traverse 16 | "_darcs": not_hygienic 17 | # Library pareto 18 | "lib/pareto.cmxs": use_pareto 19 | "lib/base.cmx": for-pack(Pareto) 20 | "lib/distributions.cmx": for-pack(Pareto) 21 | "lib/resampling.cmx": for-pack(Pareto) 22 | "lib/sample.cmx": for-pack(Pareto) 23 | "lib/tests.cmx": for-pack(Pareto) 24 | "lib/internal.cmx": for-pack(Pareto) 25 | : oasis_library_pareto_byte 26 | : oasis_library_pareto_byte 27 | : oasis_library_pareto_native 28 | : oasis_library_pareto_native 29 | : package(gsl) 30 | # Executable test_runner 31 | : package(gsl) 32 | : package(oUnit) 33 | : use_pareto 34 | : package(gsl) 35 | : package(oUnit) 36 | : use_pareto 37 | : custom 38 | # Executable base_ex 39 | : oasis_executable_base_ex_byte 40 | : oasis_executable_base_ex_byte 41 | : oasis_executable_base_ex_native 42 | : oasis_executable_base_ex_native 43 | : package(gsl) 44 | : use_pareto 45 | # Executable distributions_ex 46 | : oasis_executable_distributions_ex_byte 47 | : oasis_executable_distributions_ex_byte 48 | : oasis_executable_distributions_ex_native 49 | : oasis_executable_distributions_ex_native 50 | : package(gsl) 51 | : use_pareto 52 | # Executable tests_ex 53 | : oasis_executable_tests_ex_byte 54 | : oasis_executable_tests_ex_byte 55 | : oasis_executable_tests_ex_native 56 | : oasis_executable_tests_ex_native 57 | : package(gsl) 58 | : use_pareto 59 | # Executable sample_ex 60 | : oasis_executable_sample_ex_byte 61 | : oasis_executable_sample_ex_byte 62 | : oasis_executable_sample_ex_native 63 | : oasis_executable_sample_ex_native 64 | : package(gsl) 65 | : use_pareto 66 | # Executable resampling_ex 67 | : oasis_executable_resampling_ex_byte 68 | : oasis_executable_resampling_ex_byte 69 | : oasis_executable_resampling_ex_native 70 | : oasis_executable_resampling_ex_native 71 | : package(gsl) 72 | : use_pareto 73 | : package(gsl) 74 | : use_pareto 75 | # OASIS_STOP 76 | 77 | : use_toploop 78 | -------------------------------------------------------------------------------- /configure: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | 3 | # OASIS_START 4 | # DO NOT EDIT (digest: dc86c2ad450f91ca10c931b6045d0499) 5 | set -e 6 | 7 | FST=true 8 | for i in "$@"; do 9 | if $FST; then 10 | set -- 11 | FST=false 12 | fi 13 | 14 | case $i in 15 | --*=*) 16 | ARG=${i%%=*} 17 | VAL=${i##*=} 18 | set -- "$@" "$ARG" "$VAL" 19 | ;; 20 | *) 21 | set -- "$@" "$i" 22 | ;; 23 | esac 24 | done 25 | 26 | ocaml setup.ml -configure "$@" 27 | # OASIS_STOP 28 | -------------------------------------------------------------------------------- /examples/base_ex.ml: -------------------------------------------------------------------------------- 1 | open Pareto 2 | 3 | open Common 4 | 5 | 6 | let sample_shuffle () = 7 | let vs = Array.init 10 (fun i -> float_of_int i) in 8 | let svs = Base.shuffle vs in 9 | let svs_with_replacement = Base.sample ~replace:true ~size:5 vs 10 | and svs_without_replacement = Base.sample ~size:5 vs in begin 11 | print_string "Initial sample : "; 12 | print_float_array vs; 13 | print_string "Shuffled sample: "; 14 | print_float_array svs; 15 | print_endline "5-sample *with* replacement from the shuffled array:"; 16 | print_float_array svs_with_replacement; 17 | print_endline "5-sample *without* replacement from the shuffled array:"; 18 | print_float_array svs_without_replacement; 19 | print_newline () 20 | end 21 | 22 | 23 | let () = begin 24 | sample_shuffle () 25 | end 26 | -------------------------------------------------------------------------------- /examples/common.ml: -------------------------------------------------------------------------------- 1 | open Printf 2 | 3 | let print_array f vs = 4 | print_char '['; 5 | Array.iteri (fun i v -> 6 | f v; 7 | if i <> Array.length vs - 1 8 | then print_string ", ") vs; 9 | print_char ']'; 10 | print_newline () 11 | 12 | let print_float_array = print_array (printf "%.5f") 13 | and print_int_array = print_array (printf "%i") 14 | 15 | let random_array ?(a=0.) ?(b=100.) n = 16 | let vs = Array.make n 0. in 17 | for i = 0 to n - 1 do 18 | vs.(i) <- a +. Random.float b 19 | done; vs 20 | -------------------------------------------------------------------------------- /examples/distributions_ex.ml: -------------------------------------------------------------------------------- 1 | open Pareto.Distributions 2 | 3 | open Common 4 | 5 | 6 | let distribution_mean (type t) 7 | (module F : Features.S with type t = t and type elt = float) 8 | (d : t) () = 9 | Printf.printf "E[X] = %.4f\n" (F.mean d) 10 | 11 | let distribution_quantile (type t) 12 | (module D : ContinuousDistribution with type t = t and type elt = float) 13 | (d : t) () = 14 | Printf.printf "Q(0.5) = %.4f\n" (D.quantile ~p:0.5 d) 15 | 16 | let distribution_sample (type t) 17 | (module D : DiscreteDistribution with type t = t and type elt = int) 18 | (d : t) () = 19 | print_int_array (D.sample ~size:10 d) 20 | 21 | 22 | let () = begin 23 | distribution_mean (module Normal) (normal ~mean:0. ~sd:1.) (); 24 | distribution_quantile (module Beta) (beta ~alpha:1. ~beta:0.5) (); 25 | distribution_sample (module Poisson) (poisson ~rate:0.5) () 26 | end 27 | -------------------------------------------------------------------------------- /examples/resampling_ex.ml: -------------------------------------------------------------------------------- 1 | open Printf 2 | 3 | open Pareto 4 | 5 | open Common 6 | 7 | let bootstrap_mean () = 8 | let open Distributions.Normal in 9 | let vs = sample ~size:100 standard in 10 | let open Resampling.Bootstrap in 11 | let { point; upper_bound; lower_bound; confidence_level } = 12 | bca ~estimator:Sample.mean ~n_iter:10000 vs 13 | in begin 14 | print_endline "Sample:"; 15 | print_float_array vs; 16 | print_endline "BCA bootstrapped estimate of sample mean:"; 17 | printf "%.5f %i%% CI %.5f %.5f\n" 18 | point (int_of_float (confidence_level *. 100.)) lower_bound upper_bound; 19 | print_newline () 20 | end 21 | 22 | 23 | let () = begin 24 | bootstrap_mean () 25 | end 26 | -------------------------------------------------------------------------------- /examples/sample_ex.ml: -------------------------------------------------------------------------------- 1 | open Printf 2 | 3 | open Pareto 4 | 5 | open Common 6 | 7 | let print_histogram (points, counts) = 8 | Array.iteri (fun i count -> 9 | printf "%9.5f " points.(i); 10 | for _i = 0 to int_of_float count do 11 | print_char '*'; 12 | done; 13 | 14 | print_newline (); 15 | ) counts 16 | 17 | and print_density (points, pdf) = 18 | Array.iteri (fun i d -> 19 | let count = int_of_float (d *. 20.) in 20 | printf "%9.5f " points.(i); 21 | for i = 0 to count do 22 | print_char (if i = count then '.' else ' '); 23 | done; 24 | 25 | print_newline (); 26 | ) pdf 27 | 28 | 29 | let sample_histogram () = 30 | let open Distributions.Normal in 31 | let vs = sample ~size:100 standard in 32 | let (points, counts) = Sample.histogram ~n_bins:10 vs in begin 33 | print_endline "Normal sample histogram"; 34 | print_float_array counts; 35 | print_histogram (points, counts); 36 | print_newline () 37 | end 38 | 39 | let sample_kde () = 40 | let open Distributions.Normal in 41 | let vs = sample ~size:100 standard in 42 | let (points, pdf) = 43 | let open Sample.KDE in 44 | estimate_pdf ~kernel:Gaussian ~bandwidth:Silverman ~n_points:10 vs 45 | in begin 46 | print_endline "Normal sample (Gaussian) KDE"; 47 | print_float_array pdf; 48 | print_density (points, pdf); 49 | print_newline () 50 | end 51 | 52 | let sample_quantiles () = 53 | let vs = random_array 10 54 | and ps = [|0.; 0.25; 0.75; 1.|] in begin 55 | print_float_array vs; 56 | print_float_array ps; 57 | print_float_array (Sample.quantile ~ps vs); 58 | print_newline () 59 | end 60 | 61 | let sample_iqr () = 62 | let vs = random_array 10 in begin 63 | print_float_array vs; 64 | printf " IQR: %9.5f\n" (Sample.iqr vs); 65 | print_newline () 66 | end 67 | 68 | let sample_ranks () = 69 | let vs = random_array 10 in 70 | let (_t, ranks) = Sample.rank vs in begin 71 | print_endline "Sample ranks"; 72 | print_float_array vs; 73 | print_float_array ranks; 74 | print_newline () 75 | end 76 | 77 | let sample_correlation () = 78 | let vs1 = random_array 10 79 | and vs2 = random_array 10 in begin 80 | print_float_array vs1; 81 | print_float_array vs2; 82 | printf "Pearson product-momentum correlation: %f\n" 83 | (Sample.Correlation.pearson vs1 vs2); 84 | printf "Spearman rank-correlation: %f\n" 85 | (Sample.Correlation.spearman vs1 vs2); 86 | print_newline () 87 | end 88 | 89 | let sample_autocorrelation () = 90 | let vs = random_array 10 in begin 91 | print_float_array vs; 92 | print_endline "Pearson product-momentum autocorrelation:"; 93 | print_float_array (Sample.Correlation.Auto.pearson vs); 94 | print_newline () 95 | end 96 | 97 | let sample_summary () = 98 | let vs = random_array 10 in 99 | let open Sample in 100 | let s = Summary.(Array.fold_left add empty vs) in 101 | begin 102 | print_float_array vs; 103 | printf " kurtosis = %f, skewness = %f\n" (kurtosis vs) (skewness vs); 104 | printf "Summary kurtosis = %f, skewness = %f\n" 105 | (Summary.kurtosis s) (Summary.skewness s); 106 | print_newline () 107 | end 108 | 109 | 110 | let () = begin 111 | sample_histogram (); 112 | sample_kde (); 113 | sample_quantiles (); 114 | sample_iqr (); 115 | sample_ranks (); 116 | sample_correlation (); 117 | sample_autocorrelation (); 118 | sample_summary () 119 | end 120 | -------------------------------------------------------------------------------- /examples/tests_ex.ml: -------------------------------------------------------------------------------- 1 | open Printf 2 | 3 | open Pareto 4 | open Pareto.Tests 5 | 6 | open Common 7 | 8 | let t_test_one_sample () = 9 | let open Distributions.Normal in 10 | let v = sample ~size:10 standard in 11 | let { test_statistic = t; test_pvalue } = 12 | T.one_sample v ~mean:0. ~alternative:TwoSided () 13 | in begin 14 | printf "One-sample T-test for true mean = 0.0\n"; 15 | print_float_array v; 16 | printf "t = %f, P-value: %f\n" t test_pvalue; 17 | print_newline () 18 | end 19 | 20 | let t_test_two_sample_independent () = 21 | let open Distributions.Normal in 22 | let v1 = sample ~size:10 standard in 23 | let v2 = sample ~size:10 standard in 24 | let { test_statistic = t; test_pvalue } = 25 | T.two_sample_independent v1 v2 26 | ~mean:0.1 ~equal_variance:false ~alternative:TwoSided () 27 | in begin 28 | printf "Two-sample T-test for mean difference not equal to 0.1\n"; 29 | print_float_array v1; 30 | print_float_array v2; 31 | printf "t = %f, P-value: %f\n" t test_pvalue; 32 | print_newline () 33 | end 34 | 35 | let t_test_two_sample_paired () = 36 | let open Distributions.Normal in 37 | let v1 = sample ~size:10 standard in 38 | let v2 = Array.mapi (fun i x -> x +. v1.(i)) (sample ~size:10 standard) in 39 | let { test_statistic = t; test_pvalue } = T.two_sample_paired v1 v2 40 | ~mean:0.1 ~alternative:TwoSided () 41 | in begin 42 | printf "Paired two-sample T-test for mean difference not equal to 0.1\n"; 43 | print_float_array v1; 44 | print_float_array v2; 45 | printf "t = %f, P-value: %f\n" t test_pvalue; 46 | print_newline () 47 | end 48 | 49 | let chisq_test_gof () = 50 | let open Distributions.Uniform in 51 | let v = sample ~size:10 (create ~lower:0. ~upper:1.) in 52 | let { test_statistic = chisq; test_pvalue } = 53 | ChiSquared.goodness_of_fit v () 54 | in begin 55 | print_endline "X^2 test for goodness of fit"; 56 | print_float_array v; 57 | printf "X^2 = %f, P-value: %f\n" chisq test_pvalue; 58 | print_newline () 59 | end 60 | 61 | let chisq_test_independence () = 62 | let open Distributions.Uniform in 63 | let d = create ~lower:0. ~upper:1. in 64 | let v1 = sample ~size:10 d in 65 | let v2 = sample ~size:10 d in 66 | let { test_statistic = chisq; test_pvalue } = 67 | ChiSquared.independence [|v1; v2|] ~correction:true () 68 | in begin 69 | print_endline "X^2 test for independence with Yates' continuity correction\n"; 70 | print_float_array v1; 71 | print_float_array v2; 72 | printf "X^2 = %f, P-value: %f\n" chisq test_pvalue; 73 | print_newline () 74 | end 75 | 76 | let mann_whitney_wilcoxon () = 77 | let v1 = [|11; 1; -1; 2; 0|] in 78 | let v2 = [|-5; 9; 5; 8; 4|] in 79 | let { test_statistic = u; test_pvalue } = 80 | MannWhitneyU.two_sample_independent v1 v2 81 | ~correction:true ~alternative:TwoSided () 82 | in begin 83 | printf "Two-sample Mann-Whitney U test\n"; 84 | print_int_array v1; 85 | print_int_array v2; 86 | printf "U = %f, P-value: %f\n" u test_pvalue; 87 | print_newline () 88 | end 89 | 90 | let wilcoxon_signed_rank_one_sample () = 91 | let vs = [|11.; 1.; -1.; 2.; 0.|] in 92 | let { test_statistic = w; test_pvalue } = 93 | WilcoxonT.one_sample vs 94 | ~shift:1. ~correction:true ~alternative:Greater () 95 | in begin 96 | printf "Wilcoxon signed rank test with continuity correction\n"; 97 | print_float_array vs; 98 | printf "W = %f, P-value: %f\n" w test_pvalue; 99 | print_newline () 100 | end 101 | 102 | let wilcoxon_signed_rank_paired () = 103 | let v1 = [|11.; 1.; -1.; 2.; 0.|] in 104 | let v2 = [|-5.; 9.; 5.; 8.; 4.|] in 105 | let { test_statistic = w; test_pvalue } = 106 | WilcoxonT.two_sample_paired v1 v2 107 | ~correction:true ~alternative:Less () 108 | in begin 109 | print_endline ("Two-sample paired Wilcoxon signed rank test with " ^ 110 | "continuity correction"); 111 | print_float_array v1; 112 | print_float_array v2; 113 | printf "W = %f, P-value: %f\n" w test_pvalue; 114 | print_newline () 115 | end 116 | 117 | let sign_one_sample () = 118 | let vs = [|11.; 1.; -1.; 2.; 0.|] in 119 | let { test_statistic = pi_plus; test_pvalue } = 120 | Sign.one_sample vs ~shift:1. ~alternative:TwoSided () 121 | in begin 122 | printf "One-sample Sign test\n"; 123 | print_float_array vs; 124 | printf "π+ = %f, P-value: %f\n" pi_plus test_pvalue; 125 | print_newline () 126 | end 127 | 128 | let sign_paired () = 129 | let v1 = [|11.; 1.; -1.; 2.; 0.|] in 130 | let v2 = [|-5.; 9.; 5.; 8.; 4.|] in 131 | let { test_statistic = pi_plus; test_pvalue } = 132 | Sign.two_sample_paired v1 v2 ~alternative:TwoSided () 133 | in begin 134 | printf "Two-sample Sign test\n"; 135 | print_float_array v1; 136 | print_float_array v2; 137 | printf "π+ = %f, P-value: %f\n" pi_plus test_pvalue; 138 | print_newline () 139 | end 140 | 141 | let ks_gof () = 142 | let open Distributions.Normal in 143 | let v = sample ~size:10 standard in 144 | let { test_statistic = d; test_pvalue } = 145 | KolmogorovSmirnov.goodness_of_fit v 146 | ~cumulative_probability:(fun x -> cumulative_probability standard ~x) 147 | ~alternative:TwoSided () 148 | in begin 149 | print_endline "One-sample Kolmogorov-Smirnov test for goodness of fit"; 150 | print_float_array v; 151 | printf "D = %f, P-value: %f\n" d test_pvalue; 152 | print_newline () 153 | end 154 | 155 | let ks_two_sample () = 156 | let open Distributions.Normal in 157 | let v1 = sample ~size:10 standard in 158 | let v2 = sample ~size:10 standard in 159 | let { test_statistic = d; test_pvalue } = 160 | KolmogorovSmirnov.two_sample v1 v2 ~alternative:TwoSided () 161 | in begin 162 | print_endline "Two-sample Kolmogorov-Smirnov test"; 163 | print_float_array v1; 164 | print_float_array v2; 165 | printf "D = %f, P-value: %f\n" d test_pvalue; 166 | print_newline () 167 | end 168 | 169 | 170 | let adjust_bh () = 171 | let open Distributions.Beta in 172 | let pvalues = sample ~size:10 (create ~alpha:0.5 ~beta:0.5) in 173 | let adjusted_pvalues = Multiple.(adjust pvalues BenjaminiHochberg) in 174 | begin 175 | printf "Benjamini-Hochberg P-value adjustment\n"; 176 | print_float_array pvalues; 177 | print_float_array adjusted_pvalues; 178 | print_newline () 179 | end 180 | 181 | let adjust_hb () = 182 | let open Distributions.Beta in 183 | let pvalues = sample ~size:10 (create ~alpha:0.5 ~beta:0.5) in 184 | let adjusted_pvalues = Multiple.(adjust pvalues HolmBonferroni) in 185 | begin 186 | printf "Holm-Bonferroni P-value adjustment\n"; 187 | print_float_array pvalues; 188 | print_float_array adjusted_pvalues; 189 | print_newline () 190 | end 191 | 192 | 193 | let () = begin 194 | t_test_one_sample (); 195 | t_test_two_sample_independent (); 196 | t_test_two_sample_paired (); 197 | chisq_test_gof (); 198 | chisq_test_independence (); 199 | mann_whitney_wilcoxon (); 200 | wilcoxon_signed_rank_one_sample (); 201 | wilcoxon_signed_rank_paired (); 202 | sign_one_sample (); 203 | sign_paired (); 204 | ks_gof (); 205 | ks_two_sample (); 206 | 207 | adjust_bh (); 208 | adjust_hb () 209 | end 210 | -------------------------------------------------------------------------------- /lib/API.odocl: -------------------------------------------------------------------------------- 1 | # OASIS_START 2 | # DO NOT EDIT (digest: e7dcb58f1d0a662ba5a939df4901999e) 3 | Base 4 | Distributions 5 | Resampling 6 | Sample 7 | Tests 8 | # OASIS_STOP 9 | -------------------------------------------------------------------------------- /lib/META: -------------------------------------------------------------------------------- 1 | # OASIS_START 2 | # DO NOT EDIT (digest: e6a492f67e9c0f9953d83800a3a8effd) 3 | version = "0.3" 4 | description = "GSL powered OCaml statistics library" 5 | requires = "gsl" 6 | archive(byte) = "pareto.cma" 7 | archive(byte, plugin) = "pareto.cma" 8 | archive(native) = "pareto.cmxa" 9 | archive(native, plugin) = "pareto.cmxs" 10 | exists_if = "pareto.cma" 11 | # OASIS_STOP 12 | 13 | -------------------------------------------------------------------------------- /lib/base.ml: -------------------------------------------------------------------------------- 1 | module Randist = Gsl.Randist 2 | 3 | open Internal 4 | 5 | 6 | let range ?(a=0) ~b = 7 | if b <= a 8 | then [||] 9 | else 10 | let vs = Array.make (b - a) 0 in begin 11 | for i = a to b - 1 do 12 | Array.unsafe_set vs (i - a) i 13 | done; vs 14 | end 15 | 16 | 17 | let cumulative ~f = function 18 | | [||] -> [||] 19 | | xs -> 20 | let n = Array.length xs in 21 | let acc = Array.make n (Array.unsafe_get xs 0) in 22 | for i = 1 to n - 1 do 23 | Array.unsafe_set acc i 24 | (f (Array.unsafe_get acc (i - 1)) (Array.unsafe_get xs i)) 25 | done; acc 26 | 27 | 28 | 29 | let reorder is ~src ~dst = 30 | let n = Array.length src in 31 | for i = 0 to n - 1 do 32 | let j = Array.unsafe_get is i in 33 | Array.unsafe_set dst i (Array.unsafe_get src j) 34 | done 35 | 36 | 37 | let search_sorted ~cmp vs v = 38 | let rec loop l r = 39 | (* We're looking for [v] in a semiclosed interval [l, r). 40 | Invariants: mid < r, 41 | 0 < l <= r. *) 42 | if l >= r 43 | then None 44 | else 45 | let mid = (r - l) / 2 + l in 46 | let res = cmp v (Array.unsafe_get vs mid) in 47 | if res = 0 48 | then Some mid 49 | else if res < 0 50 | then loop l mid 51 | else loop (mid + 1) r 52 | in loop 0 (Array.length vs) 53 | 54 | 55 | let shuffle ?(rng=default_rng) vs = 56 | let svs = Array.copy vs in begin 57 | Randist.shuffle rng svs; 58 | svs 59 | end 60 | 61 | and sample ?(rng=default_rng) ?(replace=false) ?size vs = 62 | let dst = match size with 63 | | Some n -> 64 | if vs = [||] || not replace && Array.length vs < n 65 | then invalid_arg "Base.sample: not enough elements to sample from" 66 | else Array.make n vs.(0) 67 | | None -> Array.copy vs 68 | in begin 69 | if replace 70 | then Randist.sample rng ~src:vs ~dst 71 | else Randist.choose rng ~src:vs ~dst; 72 | dst 73 | end 74 | -------------------------------------------------------------------------------- /lib/base.mli: -------------------------------------------------------------------------------- 1 | (** Tutti-frutti statistical functions. *) 2 | 3 | (** {e O(n)} Calculates a cumulative statistic over a given array. *) 4 | val cumulative : f:('a -> 'a -> 'a) -> 'a array -> 'a array 5 | 6 | 7 | (** {e O(log n)} Searches for the index of a given element [v] in array 8 | [vs], sorted with a given comparison function [cmp]. *) 9 | val search_sorted : cmp:('a -> 'a -> int) -> 'a array -> 'a -> int option 10 | 11 | 12 | (** {e O(n)} Reorders values in [src] into [dst], according to a given 13 | permutation of indices. *) 14 | val reorder : int array -> src:'a array -> dst:'a array -> unit 15 | 16 | 17 | (** Creates an array of integers given a semiopen range [\[a, b)]. *) 18 | val range : ?a:int -> b:int -> int array 19 | 20 | 21 | (** {e O(n)} Shuffles a given array using Fisher-Yates shuffle. *) 22 | val shuffle : ?rng:Gsl.Rng.t -> 'a array -> 'a array 23 | 24 | (** {e O(n)} Takes a sample of the specified [size] from the given 25 | array either with or without replacement. [size] defaults to the 26 | whole array. *) 27 | val sample 28 | : ?rng:Gsl.Rng.t -> ?replace:bool -> ?size:int -> 'a array -> 'a array 29 | -------------------------------------------------------------------------------- /lib/distributions.ml: -------------------------------------------------------------------------------- 1 | open Internal 2 | 3 | module Randist = Gsl.Randist 4 | module Rng = Gsl.Rng 5 | 6 | module Features = struct 7 | module type S = sig 8 | type elt 9 | type t 10 | 11 | val mean : t -> elt 12 | val variance : t -> elt 13 | val skewness : t -> elt 14 | val kurtosis : t -> elt 15 | end 16 | 17 | module type Opt = sig 18 | type elt 19 | type t 20 | 21 | val mean_opt : t -> elt option 22 | val variance_opt : t -> elt option 23 | val skewness_opt : t -> elt option 24 | val kurtosis_opt : t -> elt option 25 | end 26 | end 27 | 28 | module type BaseDistribution = sig 29 | type elt 30 | type t 31 | 32 | val random : ?rng:Rng.t -> t -> elt 33 | val sample : ?rng:Rng.t -> size:int -> t -> elt array 34 | end 35 | 36 | module type DiscreteDistribution = sig 37 | include BaseDistribution 38 | 39 | val cumulative_probability : t -> k:elt -> float 40 | 41 | val probability : t -> k:elt -> float 42 | val log_probability : t -> k:elt -> float 43 | end 44 | 45 | module type ContinuousDistribution = sig 46 | include BaseDistribution 47 | 48 | val cumulative_probability : t -> x:elt -> float 49 | 50 | val density : t -> x:elt -> float 51 | val log_density : t -> x:elt -> float 52 | 53 | val quantile : t -> p:float -> elt 54 | end 55 | 56 | 57 | let make_sampler random ?rng ~size d = 58 | let init = random ?rng d in 59 | let vs = Array.make size init in begin 60 | for i = 1 to size - 1 do 61 | Array.unsafe_set vs i (random ?rng d) 62 | done; vs 63 | end 64 | 65 | 66 | module Normal = struct 67 | type elt = float 68 | type t = { 69 | normal_mean : float; 70 | normal_sd : float 71 | } 72 | 73 | let create ~mean ~sd = 74 | if sd > 0. 75 | then { normal_mean = mean; normal_sd = sd } 76 | else invalid_arg "Normal.create: standard deviation must be positive" 77 | 78 | let standard = create ~mean:0. ~sd:1. 79 | 80 | let cumulative_probability { normal_mean; normal_sd } ~x = 81 | Gsl.Cdf.gaussian_P ~sigma:normal_sd ~x:(x -. normal_mean) 82 | 83 | let density { normal_mean; normal_sd } ~x = 84 | Randist.gaussian_pdf ~sigma:normal_sd (x -. normal_mean) 85 | and log_density { normal_mean; normal_sd } ~x = 86 | let z = (x -. normal_mean) /. normal_sd in 87 | -0.5 *. z *. z -. (log normal_sd +. 0.5 *. Gsl.Math.(ln2 +. lnpi)) 88 | 89 | let quantile { normal_mean; normal_sd } ~p = 90 | if p < 0. || p > 1. 91 | then invalid_arg "Normal.quantile: p must be in range [0, 1]" 92 | else Gsl.Cdf.gaussian_Pinv ~sigma:normal_sd ~p +. normal_mean 93 | 94 | let mean { normal_mean; _ } = normal_mean 95 | and variance { normal_sd; _ } = sqr normal_sd 96 | and skewness _d = 0. 97 | and kurtosis _d = 0. 98 | 99 | let random ?(rng=default_rng) { normal_mean; normal_sd } = 100 | Randist.gaussian ~sigma:normal_sd rng +. normal_mean 101 | let sample = make_sampler random 102 | 103 | let mle vs = 104 | let mean = Sample.mean vs in 105 | let sd = Sample.sd ~mean vs in 106 | create ~mean ~sd 107 | end 108 | 109 | module LogNormal = struct 110 | type elt = float 111 | type t = { 112 | lognormal_mean : float; 113 | lognormal_sd : float 114 | } 115 | 116 | let create ~mean ~sd = 117 | if sd > 0. 118 | then { lognormal_mean = mean; lognormal_sd = sd } 119 | else invalid_arg "LogNormal.create: standard deviation must be positive" 120 | 121 | let cumulative_probability { lognormal_mean; lognormal_sd } ~x = 122 | Gsl.Cdf.lognormal_P ~zeta:lognormal_mean ~sigma:lognormal_sd ~x 123 | 124 | let density { lognormal_mean; lognormal_sd } ~x = 125 | Randist.lognormal_pdf ~zeta:lognormal_mean ~sigma:lognormal_sd x 126 | and log_density { lognormal_mean; lognormal_sd } ~x = 127 | if x <= 0. 128 | then neg_infinity 129 | else 130 | let z = (log x -. lognormal_mean) /. lognormal_sd in 131 | -. log x -. Gsl.Math.(ln2 +. lnpi) /. 2. -. 132 | log lognormal_sd -. z *. z /. 2. 133 | 134 | let quantile { lognormal_mean; lognormal_sd } ~p = 135 | if p < 0. || p > 1. 136 | then invalid_arg "LogNormal.quantile: p must be in range [0, 1]" 137 | else Gsl.Cdf.lognormal_Pinv ~zeta:lognormal_mean ~sigma:lognormal_sd ~p 138 | 139 | let mean { lognormal_mean; lognormal_sd } = 140 | exp (lognormal_mean +. sqr lognormal_sd) 141 | and variance { lognormal_mean; lognormal_sd } = 142 | (exp (sqr lognormal_sd) -. 1.) *. 143 | exp (2. *. lognormal_mean +. sqr lognormal_sd) 144 | and skewness { lognormal_sd; _ } = 145 | let sd2 = sqr lognormal_sd in (exp sd2 -. 2.) *. sqrt (exp sd2 -. 1.) 146 | and kurtosis { lognormal_sd; _ } = 147 | let sd2 = sqr lognormal_sd in 148 | exp (4. *. sd2) +. 2. *. exp (3. *. sd2) +. 3. *. exp (2. *. sd2) -. 6. 149 | 150 | let random ?(rng=default_rng) { lognormal_mean; lognormal_sd } = 151 | Randist.lognormal ~zeta:lognormal_mean ~sigma:lognormal_sd rng 152 | let sample = make_sampler random 153 | 154 | let mle vs = 155 | let log_vs = Array.map ~f:log vs in 156 | let mean = Sample.mean log_vs in 157 | let sd = Sample.sd ~mean:mean log_vs in 158 | create ~mean ~sd 159 | end 160 | 161 | module Uniform = struct 162 | type elt = float 163 | type t = { 164 | uniform_lower : float; 165 | uniform_upper : float 166 | } 167 | 168 | let create ~lower ~upper = 169 | if lower > upper 170 | then { uniform_lower = upper; uniform_upper = lower } 171 | else { uniform_lower = lower; uniform_upper = upper } 172 | 173 | let cumulative_probability { uniform_lower; uniform_upper } = 174 | Gsl.Cdf.flat_P ~a:uniform_lower ~b:uniform_upper 175 | 176 | let density { uniform_lower; uniform_upper } ~x = 177 | Randist.flat_pdf ~a:uniform_lower ~b:uniform_upper x 178 | and log_density { uniform_lower; uniform_upper } ~x = 179 | if x < uniform_lower || x > uniform_upper 180 | then neg_infinity 181 | else -. log (uniform_upper -. uniform_lower) 182 | 183 | let quantile { uniform_lower; uniform_upper } ~p = 184 | if p < 0. || p > 1. 185 | then invalid_arg "Uniform.quantile: p must be in range [0, 1]" 186 | else Gsl.Cdf.flat_Pinv ~a:uniform_lower ~b:uniform_upper ~p 187 | 188 | let mean { uniform_lower; uniform_upper } = 189 | 0.5 *. (uniform_lower +. uniform_upper) 190 | and variance { uniform_lower; uniform_upper } = 191 | sqr (uniform_upper -. uniform_lower) /. 12. 192 | and skewness _d = 0. 193 | and kurtosis _d = -6. /. 5. 194 | 195 | let random ?(rng=default_rng) { uniform_lower; uniform_upper } = 196 | Randist.flat ~a:uniform_lower ~b:uniform_upper rng 197 | let sample = make_sampler random 198 | 199 | let mle vs = create ~lower:(Sample.min vs) ~upper:(Sample.max vs) 200 | end 201 | 202 | module Exponential = struct 203 | type elt = float 204 | type t = { exp_scale : float } 205 | 206 | let create ~scale = 207 | if scale > 0. 208 | then { exp_scale = scale } 209 | else invalid_arg "Exponential.create: scale must be positive" 210 | 211 | let cumulative_probability { exp_scale } = Gsl.Cdf.exponential_P ~mu:exp_scale 212 | 213 | let density { exp_scale } ~x = 214 | Randist.exponential_pdf ~mu:exp_scale x 215 | and log_density { exp_scale } ~x = 216 | if x < 0. 217 | then neg_infinity 218 | else -. x /. exp_scale -. log exp_scale 219 | 220 | let quantile { exp_scale } ~p = 221 | if p < 0. || p > 1. 222 | then invalid_arg "Exponential.quantile: p must be in range [0, 1]" 223 | else Gsl.Cdf.exponential_Pinv ~mu:exp_scale ~p 224 | 225 | let mean { exp_scale } = exp_scale 226 | and variance { exp_scale } = sqrt exp_scale 227 | and skewness _d = 2. 228 | and kurtosis _d = 6. 229 | 230 | let random ?(rng=default_rng) { exp_scale } = 231 | Randist.exponential ~mu:exp_scale rng 232 | let sample = make_sampler random 233 | 234 | let mle vs = create ~scale:(Sample.mean vs) 235 | end 236 | 237 | module ChiSquared = struct 238 | type elt = float 239 | type t = { chisq_df : float } 240 | 241 | let create ~df = 242 | if df <= 0 243 | then invalid_arg "ChiSquared.create: degrees of freedom must be non negative" 244 | else { chisq_df = float_of_int df } 245 | 246 | let cumulative_probability { chisq_df } = Gsl.Cdf.chisq_P ~nu:chisq_df 247 | 248 | let density { chisq_df } ~x = Randist.chisq_pdf ~nu:chisq_df x 249 | and log_density { chisq_df } ~x = 250 | if x < 0. 251 | then neg_infinity 252 | else 253 | let k2 = chisq_df /. 2. in 254 | -. k2 *. Gsl.Math.ln2 -. Gsl.Sf.lngamma k2 +. 255 | (k2 -. 1.) *. log x -. x /. 2. 256 | 257 | let quantile { chisq_df } ~p = 258 | if p < 0. || p > 1. 259 | then invalid_arg "ChiSquared.quantile: p must be in range [0, 1]" 260 | else Gsl.Cdf.chisq_Pinv ~nu:chisq_df ~p 261 | 262 | let mean { chisq_df } = chisq_df 263 | and variance { chisq_df } = 2. *. chisq_df 264 | and skewness { chisq_df } = sqrt (8. /. chisq_df) 265 | and kurtosis { chisq_df } = 12. /. chisq_df 266 | 267 | let random ?(rng=default_rng) { chisq_df } = 268 | Randist.chisq ~nu:chisq_df rng 269 | let sample = make_sampler random 270 | 271 | let mme vs = create ~df:(round (Sample.mean vs)) 272 | end 273 | 274 | module F = struct 275 | type elt = float 276 | type t = { 277 | f_df1 : float; 278 | f_df2 : float 279 | } 280 | 281 | let create ~df1 ~df2 = 282 | if df1 <= 0 || df2 <= 0 283 | then invalid_arg "F.create: degrees of freedom must be non negative" 284 | else { f_df1 = float_of_int df1; f_df2 = float_of_int df2 } 285 | 286 | let cumulative_probability { f_df1; f_df2 } = 287 | Gsl.Cdf.fdist_P ~nu1:f_df1 ~nu2:f_df2 288 | 289 | let density { f_df1; f_df2 } ~x = 290 | Randist.fdist_pdf ~nu1:f_df1 ~nu2:f_df2 x 291 | and log_density { f_df1; f_df2; } ~x = 292 | let f_df12 = f_df1 /. 2. 293 | and f_df22 = f_df2 /. 2. 294 | in f_df12 *. (log f_df1 +. log x) +. f_df22 *. log f_df2 -. 295 | (f_df12 +. f_df22) *. log (f_df1 *. x +. f_df2) -. 296 | log x -. Gsl.Sf.lnbeta f_df12 f_df22 297 | 298 | let quantile { f_df1; f_df2 } ~p = 299 | if p < 0. || p > 1. 300 | then invalid_arg "F.quantile: p must be in range [0, 1]" 301 | else Gsl.Cdf.fdist_Pinv ~nu1:f_df1 ~nu2:f_df2 ~p 302 | 303 | let mean_opt { f_df2; _ } = 304 | if f_df2 <= 2. 305 | then None 306 | else Some (f_df2 /. (f_df2 -. 2.)) 307 | and variance_opt { f_df1; f_df2 } = 308 | if f_df2 <= 4. 309 | then None 310 | else Some (2. *. sqr f_df2 *. (f_df1 +. f_df2 -. 2.) /. 311 | (f_df1 *. sqr (f_df2 -. 2.) *. (f_df2 -. 4.))) 312 | and skewness_opt { f_df1; f_df2 } = 313 | if f_df2 <= 6. 314 | then None 315 | else Some ((2. *. f_df1 +. f_df2 -. 2.) *. sqrt (8. *. (f_df2 -. 4.)) /. 316 | ((f_df2 -. 6.) *. sqrt (f_df1 *. (f_df1 +. f_df2 -. 2.)))) 317 | and kurtosis_opt { f_df1; f_df2 } = 318 | if f_df2 <= 8. 319 | then None 320 | else Some (12. *. 321 | (f_df1 *. (5. *. f_df2 -. 22.) *. (f_df1 +. f_df2 -. 2.) +. 322 | (f_df2 -. 4.) *. sqr (f_df2 -. 2.)) /. 323 | (f_df1 *. (f_df2 -. 6.) *. (f_df2 -. 8.) *. 324 | (f_df1 +. f_df2 -. 2.))) 325 | 326 | let random ?(rng=default_rng) { f_df1; f_df2 } = 327 | Randist.fdist ~nu1:f_df1 ~nu2:f_df2 rng 328 | let sample = make_sampler random 329 | 330 | let mme vs = 331 | let mean = Sample.mean vs in 332 | let variance = Sample.variance ~mean vs in 333 | let df1 = 334 | -2 * round (sqr mean /. (sqr mean *. (mean -. 1.) +. 335 | mean *. variance -. 2. *. variance)) 336 | and df2 = 2 * round (mean /. (mean -. 1.)) in 337 | create ~df1 ~df2 338 | end 339 | 340 | module T = struct 341 | type elt = float 342 | type t = { t_df : float } 343 | 344 | let create ~df = 345 | if df <= 0. 346 | then invalid_arg "T.create: degrees of freedom must be non negative" 347 | else { t_df = df } 348 | 349 | let cumulative_probability { t_df } = Gsl.Cdf.tdist_P ~nu:t_df 350 | 351 | let density { t_df } ~x = Randist.tdist_pdf ~nu:t_df x 352 | and log_density { t_df } ~x = 353 | Gsl.Sf.lngamma ((t_df +. 1.) /. 2.) -. 354 | (log t_df +. Gsl.Math.lnpi) /. 2. -. 355 | Gsl.Sf.lngamma (t_df /. 2.) -. 356 | (t_df +. 1.) /. 2. *. Gsl.Sf.log_1plusx (x *. x /. t_df) 357 | 358 | let quantile { t_df } ~p = 359 | if p < 0. || p > 1. 360 | then invalid_arg "T.quantile: p must be in range [0, 1]" 361 | else Gsl.Cdf.tdist_Pinv ~nu:t_df ~p 362 | 363 | let mean_opt { t_df } = if t_df > 0. then Some 0. else None 364 | and variance_opt { t_df } = 365 | if t_df > 2. 366 | then Some (t_df /. (t_df -. 2.)) 367 | else if t_df > 1. 368 | then Some infinity 369 | else None 370 | and skewness_opt { t_df } = 371 | if t_df > 3. 372 | then Some 0. 373 | else None 374 | and kurtosis_opt { t_df } = 375 | if t_df > 4. 376 | then Some (6. /. (t_df -. 4.)) 377 | else if t_df > 2. 378 | then Some infinity 379 | else None 380 | 381 | let random ?(rng=default_rng) { t_df } = Randist.tdist ~nu:t_df rng 382 | let sample = make_sampler random 383 | 384 | let mme vs = 385 | let variance = Sample.variance vs in 386 | if abs_float variance = infinity 387 | then invalid_arg "T.mme: infinite sample" 388 | else create ~df:(2. *. variance /. (variance -. 1.)) 389 | end 390 | 391 | module Gamma = struct 392 | type elt = float 393 | type t = { 394 | gamma_shape : float; 395 | gamma_scale : float 396 | } 397 | 398 | let create ~shape ~scale = 399 | if shape <= 0. 400 | then invalid_arg "Gamma.create: shape must be positive" 401 | else if scale <= 0. 402 | then invalid_arg "Gamma.create: scale must be positive" 403 | else { gamma_shape = shape; gamma_scale = scale } 404 | 405 | let cumulative_probability { gamma_shape; gamma_scale } = 406 | Gsl.Cdf.gamma_P ~a:gamma_shape ~b:gamma_scale 407 | 408 | let density { gamma_shape; gamma_scale } ~x = 409 | Randist.gamma_pdf ~a:gamma_shape ~b:gamma_scale x 410 | and log_density { gamma_shape; gamma_scale } ~ x = 411 | if x <= 0. 412 | then neg_infinity 413 | else 414 | -. Gsl.Sf.lngamma gamma_shape -. gamma_shape *. log gamma_scale +. 415 | (gamma_shape -. 1.) *. log x -. x /. gamma_scale 416 | 417 | let quantile { gamma_shape; gamma_scale } ~p = 418 | if p < 0. || p > 1. 419 | then invalid_arg "Gamma.quantile: p must be in range [0, 1]" 420 | else Gsl.Cdf.gamma_Pinv ~a:gamma_shape ~b:gamma_scale ~p 421 | 422 | let mean { gamma_shape; gamma_scale } = gamma_shape *. gamma_scale 423 | and variance { gamma_shape; gamma_scale } = gamma_shape *. sqr gamma_scale 424 | and skewness { gamma_shape; _ } = 2. /. sqrt gamma_shape 425 | and kurtosis { gamma_shape; _ } = 6. /. gamma_shape 426 | 427 | let random ?(rng=default_rng) { gamma_shape; gamma_scale } = 428 | Randist.gamma ~a:gamma_shape ~b:gamma_scale rng 429 | let sample = make_sampler random 430 | 431 | let mme vs = 432 | let mean = Sample.mean vs in 433 | let variance = Sample.variance ~mean vs in 434 | create ~shape:(sqr mean /. variance) ~scale:(mean /. variance) 435 | 436 | let mle ~n_iter ~epsilon vs = 437 | let log_mean = log (Sample.mean vs) 438 | and mean_log = Sample.mean (Array.map ~f:log vs) in 439 | let s = log_mean -. mean_log in 440 | 441 | let f logk = logk -. Gsl.Sf.psi (exp logk) -. s 442 | and df logk = 1. -. exp logk *. Gsl.Sf.psi_1 (exp logk) in 443 | 444 | let logk = find_root_newton 445 | ~n_iter ~epsilon 446 | ~init:(log (3. -. s +. sqrt (sqr (s -. 3.) +. 24. *. s)) -. 447 | log 12. -. log s) 448 | Gsl.Fun.({ f; df; fdf = fun logk -> (f logk, df logk) }) 449 | in create ~shape:(exp logk) ~scale:(Sample.mean vs /. exp logk) 450 | end 451 | 452 | module Cauchy = struct 453 | type elt = float 454 | type t = { 455 | cauchy_location : float; 456 | cauchy_scale : float 457 | } 458 | 459 | let create ~location ~scale = 460 | if scale <= 0. 461 | then invalid_arg "Cauchy.create: scale must be positive" 462 | else { cauchy_location = location; cauchy_scale = scale } 463 | 464 | let standard = create ~location:0. ~scale:1. 465 | 466 | let cumulative_probability { cauchy_location; cauchy_scale } ~x = 467 | Gsl.Cdf.cauchy_P ~a:cauchy_scale ~x:(x -. cauchy_location) 468 | 469 | let density { cauchy_location; cauchy_scale } ~x = 470 | Randist.cauchy_pdf ~a:cauchy_scale (x -. cauchy_location) 471 | and log_density { cauchy_location; cauchy_scale } ~x = 472 | let z = (x -. cauchy_location) /. cauchy_scale in 473 | -. (Gsl.Math.lnpi +. log cauchy_scale +. Gsl.Sf.log_1plusx (z *. z)) 474 | 475 | let quantile { cauchy_location; cauchy_scale } ~p = 476 | if p < 0. || p > 1. 477 | then invalid_arg "Cauchy.quantile: p must be in range [0, 1]" 478 | else Gsl.Cdf.cauchy_Pinv ~a:cauchy_scale ~p +. cauchy_location 479 | 480 | let random ?(rng=default_rng) { cauchy_location; cauchy_scale } = 481 | Randist.cauchy ~a:cauchy_scale rng +. cauchy_location 482 | let sample = make_sampler random 483 | end 484 | 485 | module Beta = struct 486 | type elt = float 487 | type t = { 488 | beta_alpha : float; 489 | beta_beta : float 490 | } 491 | 492 | let create ~alpha ~beta = 493 | if alpha <= 0. || beta <= 0. 494 | then invalid_arg "Beta.create: shape parameters must be positive" 495 | else { beta_alpha = alpha; beta_beta = beta } 496 | 497 | let cumulative_probability { beta_alpha; beta_beta } = 498 | Gsl.Cdf.beta_P ~a:beta_alpha ~b:beta_beta 499 | 500 | let density { beta_alpha; beta_beta } ~x = 501 | Randist.beta_pdf ~a:beta_alpha ~b:beta_beta x 502 | and log_density { beta_alpha; beta_beta } ~x = 503 | if x < 0. || x > 1. 504 | then neg_infinity 505 | else 506 | (beta_alpha -. 1.) *. log x +. (beta_beta -. 1.) *. log (1. -. x) -. 507 | Gsl.Sf.lnbeta beta_alpha beta_beta 508 | 509 | let quantile { beta_alpha; beta_beta } ~p = 510 | if p < 0. || p > 1. 511 | then invalid_arg "Beta.quantile: p must be in range [0, 1]" 512 | else Gsl.Cdf.beta_Pinv ~a:beta_alpha ~b:beta_beta ~p 513 | 514 | let mean { beta_alpha = a; beta_beta = b } = a /. (a +. b) 515 | and variance { beta_alpha = a; beta_beta = b} = 516 | a *. b /. (sqr (a +. b) *. (a +. b +. 1.)) 517 | and skewness { beta_alpha = a; beta_beta = b } = 518 | 2. *. (b -. a) *. sqrt (a +. b +. 1.) /. ((a +. b +. 2.) *. sqrt (a *. b)) 519 | and kurtosis { beta_alpha = a; beta_beta = b } = 520 | 6. *. (sqr (a -. b) *. (a +. b +. 1.) -. a *. b *. (a +. b +. 2.)) /. 521 | (a *. b *. (a +. b +. 2.) *. (a +. b +. 3.)) 522 | 523 | let random ?(rng=default_rng) { beta_alpha; beta_beta } = 524 | Randist.beta ~a:beta_alpha ~b:beta_beta rng 525 | let sample = make_sampler random 526 | 527 | let mme vs = 528 | let mean = Sample.mean vs in 529 | let variance = Sample.variance ~mean vs in 530 | create ~alpha:(mean *. (sqr mean -. mean +. variance) /. variance) 531 | ~beta:((mean -. 1.) *. (sqr mean -. mean +. variance) /. variance) 532 | end 533 | 534 | module Logistic = struct 535 | type elt = float 536 | type t = { 537 | logistic_location : float; 538 | logistic_scale : float 539 | } 540 | 541 | let create ~location ~scale = 542 | if scale <= 0. 543 | then invalid_arg "Logistic.create: scale must be positive" 544 | else { logistic_location = location; logistic_scale = scale } 545 | 546 | let cumulative_probability { logistic_location; logistic_scale } ~x = 547 | Gsl.Cdf.logistic_P ~a:logistic_scale ~x:(x -. logistic_location) 548 | 549 | let density { logistic_location; logistic_scale } ~x = 550 | Randist.logistic_pdf ~a:logistic_scale (x -. logistic_location) 551 | and log_density { logistic_location; logistic_scale } ~x = 552 | let z = (x -. logistic_location) /. logistic_scale in 553 | -. z -. log logistic_scale -. 2. *. (Gsl.Sf.log_1plusx (exp (-. z))) 554 | 555 | let quantile { logistic_location; logistic_scale } ~p = 556 | if p < 0. || p > 1. 557 | then invalid_arg "Logistic.quantile: p must be in range [0, 1]" 558 | else Gsl.Cdf.logistic_Pinv ~a:logistic_scale ~p +. logistic_location 559 | 560 | let mean { logistic_location; _ } = logistic_location 561 | and variance { logistic_scale; _ } = 562 | sqr (logistic_scale *. Gsl.Math.pi) /. 3. 563 | and skewness _d = 0. 564 | and kurtosis _d = 1.2 565 | 566 | let random ?(rng=default_rng) { logistic_location; logistic_scale } = 567 | Randist.logistic ~a:logistic_scale rng +. logistic_location 568 | let sample = make_sampler random 569 | 570 | let mme vs = 571 | let mean = Sample.mean vs in 572 | let variance = Sample.variance ~mean vs in 573 | let open Gsl.Math in 574 | create ~location:mean ~scale:(sqrt (3. *. variance) /. pi) 575 | end 576 | 577 | 578 | module Poisson = struct 579 | type elt = int 580 | type t = { poisson_rate : float } 581 | 582 | let create ~rate = 583 | if rate > 0. 584 | then { poisson_rate = rate } 585 | else invalid_arg "Poisson.create: rate must be positive" 586 | 587 | let cumulative_probability { poisson_rate } ~k = 588 | Gsl.Cdf.poisson_P ~mu:poisson_rate ~k 589 | 590 | let probability { poisson_rate } ~k = 591 | Randist.poisson_pdf ~mu:poisson_rate k 592 | and log_probability { poisson_rate } ~k = 593 | if k < 0 594 | then neg_infinity 595 | else 596 | float k *. log poisson_rate -. 597 | Gsl.Sf.lngamma (float (k + 1)) -. poisson_rate 598 | 599 | let mean { poisson_rate } = poisson_rate 600 | and variance { poisson_rate } = poisson_rate 601 | and skewness { poisson_rate } = 1. /. sqrt poisson_rate 602 | and kurtosis { poisson_rate } = 1. /. poisson_rate 603 | 604 | let random ?(rng=default_rng) { poisson_rate } = 605 | Randist.poisson ~mu:poisson_rate rng 606 | let sample = make_sampler random 607 | 608 | let mle vs = create ~rate:(Sample.mean (Array.map ~f:float_of_int vs)) 609 | end 610 | 611 | module Bernoulli = struct 612 | type elt = int 613 | type t = { bernoulli_p : float } 614 | 615 | let create ~p = 616 | if p > 1.0 || p < 0. 617 | then invalid_arg "Bernoulli.create: probability must be in range [0, 1]" 618 | else { bernoulli_p = p } 619 | 620 | let cumulative_probability { bernoulli_p = p } ~k = 621 | if k < 0 622 | then 0. 623 | else if k < 1 624 | then 1. -. p 625 | else 1. 626 | 627 | let probability { bernoulli_p = p } ~k = 628 | if k = 0 629 | then 1. -. p 630 | else if k = 1 631 | then p 632 | else 0. 633 | let log_probability d ~k = log (probability d ~k) 634 | 635 | let mean { bernoulli_p = p } = p 636 | and variance { bernoulli_p = p } = p *. (1. -. p) 637 | and skewness { bernoulli_p = p } = 638 | (1. -. 2. *. p) /. sqrt (p *. (1. -. p)) 639 | and kurtosis { bernoulli_p = p } = 640 | (1. -. 6. *. p *. (1. -. p)) /. (p *. (1. -. p)) 641 | 642 | let random ?(rng=default_rng) { bernoulli_p } = 643 | Randist.bernoulli ~p:bernoulli_p rng 644 | let sample = make_sampler random 645 | 646 | let mle vs = 647 | let n = Array.length vs 648 | and y = Array.fold_left ~f:(+) ~init:0 vs in 649 | create ~p:(float_of_int y /. float_of_int n) 650 | end 651 | 652 | module Binomial = struct 653 | type elt = int 654 | type t = { 655 | binomial_trials : int; 656 | binomial_p : float 657 | } 658 | 659 | let create ~trials ~p = 660 | if trials < 0 661 | then invalid_arg "Binomial.create: number of trials must be non negative" 662 | else if p > 1.0 || p < 0. 663 | then invalid_arg "Binomial.create: probability must be in range [0, 1]" 664 | else { binomial_trials = trials; binomial_p = p } 665 | 666 | let cumulative_probability { binomial_trials = n; binomial_p = p } ~k = 667 | Gsl.Cdf.binomial_P ~n ~p ~k 668 | 669 | let probability { binomial_trials = n; binomial_p = p } ~k = 670 | Randist.binomial_pdf ~n ~p k 671 | and log_probability { binomial_trials = n; binomial_p = p } ~k = 672 | if k < 0 || k > n 673 | then neg_infinity 674 | else 675 | Gsl.Sf.lnchoose n k +. float k *. log p +. float (n - k) *. log (1. -. p) 676 | 677 | let mean { binomial_trials = n; binomial_p = p } = float_of_int n *. p 678 | and variance { binomial_trials = n; binomial_p = p } = 679 | float_of_int n *. p *. (1. -. p) 680 | and skewness { binomial_trials = n; binomial_p = p } = 681 | (1. -. 2. *. p) /. sqrt (float_of_int n *. p *. (1. -. p)) 682 | and kurtosis { binomial_trials = n; binomial_p = p } = 683 | (1. -. 6. *. p *. (1. -. p)) /. (float_of_int n *. p *. (1. -. p)) 684 | 685 | let random ?(rng=default_rng) { binomial_trials; binomial_p } = 686 | Randist.binomial ~n:binomial_trials ~p:binomial_p rng 687 | let sample = make_sampler random 688 | 689 | let mme vs = 690 | let vs = Array.map ~f:float_of_int vs in 691 | let mean = Sample.mean vs in 692 | let variance = Sample.variance ~mean vs in 693 | create ~trials:(round (sqr mean /. (mean -. variance))) 694 | ~p:(1. -. variance /. mean) 695 | end 696 | 697 | module Geometric = struct 698 | type elt = int 699 | type t = { geometric_p : float } 700 | 701 | let create ~p = 702 | if p > 1.0 || p <= 0. 703 | then invalid_arg "Geometric.create: probability must be in range (0, 1]" 704 | else { geometric_p = p } 705 | 706 | let cumulative_probability { geometric_p = p } ~k = 707 | Gsl.Cdf.geometric_P ~p ~k 708 | 709 | let probability { geometric_p = p } ~k = Randist.geometric_pdf ~p k 710 | and log_probability { geometric_p = p } ~k = 711 | if k <= 0 712 | then neg_infinity 713 | else float (k - 1) *. log (1. -. p) +. log p 714 | 715 | let mean { geometric_p } = 1. /. geometric_p 716 | and variance { geometric_p } = 717 | (1. -. geometric_p) /. sqr geometric_p 718 | and skewness { geometric_p } = 719 | (2. -. geometric_p) /. sqrt (1. -. geometric_p) 720 | and kurtosis { geometric_p } = 721 | 6. +. sqr geometric_p /. (1. -. geometric_p) 722 | 723 | let random ?(rng=default_rng) { geometric_p } = 724 | Randist.geometric ~p:geometric_p rng 725 | let sample = make_sampler random 726 | 727 | let mme vs = 728 | let n = Array.length vs in 729 | let p = float_of_int n /. 730 | float_of_int (Array.fold_left ~f:(+) ~init:0 vs) 731 | in create ~p 732 | end 733 | 734 | module Hypergeometric = struct 735 | type elt = int 736 | type t = { 737 | hyper_m : int; 738 | hyper_t : int; 739 | hyper_k : int 740 | } 741 | 742 | let create ~m ~t ~k = 743 | if t < 0 744 | then invalid_arg "Hypergeometric.create: t must be non negative" 745 | else if m < 0 || m > t 746 | then invalid_arg "Hypergeometric.create: m must be in range [0, t]" 747 | else if k <= 0 || k > t 748 | then invalid_arg "Hypergeometric.create: k must be in range (0, t]" 749 | else { hyper_m = m; hyper_t = t; hyper_k = k } 750 | 751 | let cumulative_probability { hyper_m; hyper_t; hyper_k } ~k = 752 | Gsl.Cdf.hypergeometric_P ~n1:hyper_m ~n2:(hyper_t - hyper_m) ~t:hyper_k ~k 753 | 754 | let probability { hyper_m; hyper_t; hyper_k } ~k = 755 | Randist.hypergeometric_pdf ~n1:hyper_m ~n2:(hyper_t - hyper_m) ~t:hyper_k k 756 | and log_probability { hyper_m; hyper_t; hyper_k } ~k = 757 | if k < max 0 (hyper_k - hyper_t + hyper_m) || k > min hyper_m hyper_k 758 | then neg_infinity 759 | else 760 | Gsl.Sf.(lnchoose hyper_m k +. 761 | lnchoose (hyper_t - hyper_m) (hyper_k - k) -. 762 | lnchoose hyper_t hyper_k) 763 | 764 | let mean { hyper_m; hyper_t; hyper_k } = 765 | float_of_int (hyper_k * hyper_m) /. float_of_int hyper_t 766 | and variance { hyper_m; hyper_t; hyper_k } = 767 | let m = float_of_int hyper_m 768 | and t = float_of_int hyper_t 769 | and k = float_of_int hyper_k 770 | in (k *. m /. t) *. (1. -. m /. t) *. (t -. k) /. (t -. 1.) 771 | and skewness { hyper_m; hyper_t; hyper_k } = 772 | let m = float_of_int hyper_m 773 | and t = float_of_int hyper_t 774 | and k = float_of_int hyper_k 775 | in (t -. 2. *. m) *. sqrt (t -. 1.) *. (t -. 2. *. k) /. 776 | (sqrt (k *. m *. (t -. m) *. (t -. k)) *. (t -. 2.)) 777 | and kurtosis { hyper_m; hyper_t; hyper_k } = 778 | let m = float_of_int hyper_m 779 | and t = float_of_int hyper_t 780 | and k = float_of_int hyper_k 781 | in ((t -. 1.) *. sqr t *. 782 | (t *. (t +. 1.) -. 783 | 6. *. m *. (t -. m) -. 784 | 6. *. k *. (t -. k)) +. 785 | 6. *. k *. m *. (t -. m) *. (t -. k) *. (5. *. t -. 6.)) /. 786 | (k *. m *. (t -. m) *. (t -. k) *. (t -. 2.) *. (t -. 3.)) 787 | 788 | let random ?(rng=default_rng) { hyper_m; hyper_t; hyper_k } = 789 | Randist.hypergeometric ~n1:hyper_m ~n2:(hyper_t - hyper_m) ~t:hyper_k rng 790 | let sample = make_sampler random 791 | end 792 | 793 | module NegativeBinomial = struct 794 | type elt = int 795 | type t = { 796 | nbinomial_failures : float; 797 | nbinomial_p : float 798 | } 799 | 800 | let create ~failures ~p = 801 | if failures < 0. 802 | then invalid_arg ("NegativeBinomial.create: number of failures must " ^ 803 | "be non negative") 804 | else if p >= 1.0 || p <= 0. 805 | then invalid_arg "NegativeBinomial.create: probability must be in range (0, 1)" 806 | else { nbinomial_failures = failures; nbinomial_p = p } 807 | 808 | let cumulative_probability { nbinomial_failures = r; nbinomial_p = p } ~k = 809 | Gsl.Cdf.negative_binomial_P ~n:r ~p:(1. -. p) ~k 810 | 811 | let probability { nbinomial_failures = r; nbinomial_p = p } ~k = 812 | Randist.negative_binomial_pdf ~n:r ~p:(1. -. p) k 813 | and log_probability { nbinomial_failures = r; nbinomial_p = p } ~k = 814 | if k < 0 815 | then neg_infinity 816 | else 817 | Gsl.Sf.lngamma (float k +. r) -. 818 | Gsl.Sf.lngamma (float (k + 1)) -. 819 | Gsl.Sf.lngamma r +. float k *. log p +. r *. log (1. -. p) 820 | 821 | let mean { nbinomial_failures = r; nbinomial_p = p } = 822 | r *. p /. (1. -. p) 823 | and variance { nbinomial_failures = r; nbinomial_p = p } = 824 | r *. p *. sqr (1. -. p) 825 | and skewness { nbinomial_failures = r; nbinomial_p = p } = 826 | (1. +. p) /. sqrt (r *. p) 827 | and kurtosis { nbinomial_failures = r; nbinomial_p = p } = 828 | 6. /. r +. sqr (1. +. p) /. (r *. p) 829 | 830 | let random ?(rng=default_rng) { nbinomial_failures; nbinomial_p } = 831 | Randist.negative_binomial ~n:nbinomial_failures ~p:(1. -. nbinomial_p) rng 832 | let sample = make_sampler random 833 | 834 | let mme vs = 835 | let vs = Array.map ~f:float_of_int vs in 836 | let mean = Sample.mean vs in 837 | let variance = Sample.variance ~mean vs in 838 | create 839 | ~failures:(sqr mean /. (variance -. mean)) 840 | ~p:(1. -. mean /. variance) 841 | end 842 | 843 | module Categorical = struct 844 | module type OrderedType = Map.OrderedType 845 | 846 | module type S = sig 847 | include DiscreteDistribution 848 | 849 | val create : (elt * float) array -> t 850 | 851 | val mle : elt array -> t 852 | end 853 | 854 | module Make (Elt : OrderedType) = struct 855 | type elt = Elt.t 856 | type t = { 857 | categorical_values : elt array; 858 | categorical_probs : float array; 859 | categorical_cumsum : Randist.discrete 860 | } 861 | 862 | let create dist = 863 | let n = Array.length dist 864 | and is = 865 | Array.sort_index dist 866 | ~cmp:(fun (v1, _p1) (v2, _p2) -> Elt.compare v1 v2) 867 | in if n = 0 then invalid_arg "Categorical.Make: no data"; 868 | 869 | let (v0, p0) = Array.(unsafe_get dist (unsafe_get is 0)) in 870 | let vs = Array.make n v0 871 | and probs = Array.make n p0 in begin 872 | for i = 1 to n - 1 do 873 | let (v, p) = Array.(unsafe_get dist (unsafe_get is i)) in 874 | Array.unsafe_set vs i v; 875 | Array.unsafe_set probs i p 876 | done; 877 | 878 | (* Note(superbobry): ideally, we should check that given 879 | probabilities sum up to 1., but I don't see how to do this 880 | for floating point numbers. *) 881 | { 882 | categorical_values = vs; 883 | categorical_probs = probs; 884 | categorical_cumsum = Randist.discrete_preproc probs 885 | } 886 | end 887 | 888 | let cumulative_probability { categorical_values; categorical_probs; _ } ~k = 889 | match Base.search_sorted ~cmp:Elt.compare categorical_values k with 890 | | Some pos -> 891 | let acc = ref 0. in begin 892 | for i = 0 to pos do 893 | acc := !acc +. Array.unsafe_get categorical_probs i 894 | done 895 | end; !acc 896 | | None -> 897 | if Elt.compare k (Array.unsafe_get categorical_values 0) < 0 898 | then 0. 899 | else 900 | (* Then it must be the case that forall vs : v > n. *) 901 | 1. 902 | 903 | let probability { categorical_values; categorical_probs; _ } ~k = 904 | match Base.search_sorted ~cmp:Elt.compare categorical_values k with 905 | | Some pos -> Array.unsafe_get categorical_probs pos 906 | | None -> 0. 907 | let log_probability d ~k = log (probability d ~k) 908 | 909 | let random ?(rng=default_rng) 910 | { categorical_values; categorical_cumsum; _ } = 911 | let pos = Randist.discrete rng categorical_cumsum in 912 | Array.unsafe_get categorical_values pos 913 | let sample = make_sampler random 914 | 915 | let mle vs = 916 | let n = Array.length vs in 917 | if n = 0 918 | then invalid_arg "Categorical.Make.mle: no data" 919 | else 920 | let counts = Hashtbl.create 8 in begin 921 | for i = 0 to n - 1 do 922 | let v = Array.unsafe_get vs i in 923 | if Hashtbl.mem counts v 924 | then Hashtbl.replace counts v (Hashtbl.find counts v + 1) 925 | else Hashtbl.add counts v 1 926 | done; 927 | 928 | let dist = Hashtbl.fold (fun k v dist -> 929 | ((k, float_of_int v /. float_of_int n) :: dist)) counts [] 930 | in create (Array.of_list dist) 931 | end 932 | end 933 | end 934 | 935 | 936 | let normal = Normal.create 937 | and log_normal = LogNormal.create 938 | and uniform = Uniform.create 939 | and exponential = Exponential.create 940 | and chi_squared = ChiSquared.create 941 | and f = F.create 942 | and t = T.create 943 | and gamma = Gamma.create 944 | and cauchy = Cauchy.create 945 | and beta = Beta.create 946 | and logistic = Logistic.create 947 | 948 | let poisson = Poisson.create 949 | and bernoulli = Bernoulli.create 950 | and binomial = Binomial.create 951 | and geometric = Geometric.create 952 | and hypergeometric = Hypergeometric.create 953 | and negative_binomial = NegativeBinomial.create 954 | -------------------------------------------------------------------------------- /lib/distributions.mli: -------------------------------------------------------------------------------- 1 | (** Commonly used probability distributions. *) 2 | 3 | module Features : sig 4 | (** Distribution features. *) 5 | module type S = sig 6 | type elt 7 | type t 8 | 9 | val mean : t -> elt 10 | val variance : t -> elt 11 | val skewness : t -> elt 12 | val kurtosis : t -> elt 13 | end 14 | 15 | (** Distribution features, which are allowed to be undefined for some 16 | combinations of distribution parameters. *) 17 | module type Opt = sig 18 | type elt 19 | type t 20 | 21 | val mean_opt : t -> elt option 22 | val variance_opt : t -> elt option 23 | val skewness_opt : t -> elt option 24 | val kurtosis_opt : t -> elt option 25 | end 26 | end 27 | 28 | module type DiscreteDistribution = sig 29 | type elt 30 | type t 31 | 32 | (** Samples a single point from the distribution. *) 33 | val random : ?rng:Gsl.Rng.t -> t -> elt 34 | 35 | (** Samples [size] data points from the distribution. *) 36 | val sample : ?rng:Gsl.Rng.t -> size:int -> t -> elt array 37 | 38 | (** Computes cumulative probability function for a given value [k], 39 | i. e. [P(X <= k)], the probability that a random variable [X] will 40 | be found at a value less than or equal to [k]. *) 41 | val cumulative_probability : t -> k:elt -> float 42 | 43 | (** Computes probability mass function for a given value [k], i. e. 44 | [P(X = k)], the probability that a random variable [X] is 45 | {b exactly} equal to [k] *) 46 | val probability : t -> k:elt -> float 47 | 48 | (** Computes natural logarithm of the probability mass function for 49 | a given value [k]. *) 50 | val log_probability : t -> k:elt -> float 51 | end 52 | 53 | module type ContinuousDistribution = sig 54 | type elt 55 | type t 56 | 57 | (** Samples a single point from the distribution. *) 58 | val random : ?rng:Gsl.Rng.t -> t -> elt 59 | 60 | (** Samples [size] data points from the distribution. *) 61 | val sample : ?rng:Gsl.Rng.t -> size:int -> t -> elt array 62 | 63 | (** Computes cumulative probability function for a given value [n], 64 | i. e. [P(X <= n)], the probability that a random variable [X] will 65 | be found at a value less than or equal to [n]. *) 66 | val cumulative_probability : t -> x:elt -> float 67 | 68 | (** Computes probability density function for a given value [n], i. e. 69 | [P(X = n)], the probability that a random variable [X] is 70 | {b exactly} equal to [n] *) 71 | val density : t -> x:elt -> float 72 | 73 | (** Computes natural logarithm of the probability density function for 74 | a given value [n]. *) 75 | val log_density : t -> x:elt -> float 76 | 77 | (** Computes inverse cumulative probability function for a given 78 | probability [p]. *) 79 | val quantile : t -> p:float -> elt 80 | end 81 | 82 | 83 | (** {2 Continuous distributions} *) 84 | 85 | (** The normal distribution. *) 86 | module Normal : sig 87 | type t = { 88 | normal_mean : float; 89 | normal_sd : float 90 | } 91 | 92 | include ContinuousDistribution with type t := t and type elt = float 93 | include Features.S with type t := t and type elt := float 94 | 95 | (** Creates normal distribution from parameters. *) 96 | val create : mean:float -> sd:float -> t 97 | 98 | (** Standard normal distribution with 0 [mean] and [sd] equal to 1. *) 99 | val standard : t 100 | 101 | (** Creates normal distribution with a MLE of parameters, estimated 102 | from given data. *) 103 | val mle : float array -> t 104 | end 105 | 106 | (** The log-normal distribution. *) 107 | module LogNormal : sig 108 | type t = { 109 | lognormal_mean : float; 110 | lognormal_sd : float 111 | } 112 | 113 | include ContinuousDistribution with type t := t and type elt = float 114 | include Features.S with type t := t and type elt := float 115 | 116 | (** Creates log-normal distribution from parameters. *) 117 | val create : mean:float -> sd:float -> t 118 | 119 | (** Creates log-normal distribution with a MLE of parameters, estimated 120 | from given data. *) 121 | val mle : float array -> t 122 | end 123 | 124 | (** Random variate distributed uniformly in the interval. *) 125 | module Uniform : sig 126 | type t = { 127 | uniform_lower : float; 128 | uniform_upper : float 129 | } 130 | 131 | include ContinuousDistribution with type t := t and type elt = float 132 | include Features.S with type t := t and type elt := float 133 | 134 | (** Creates uniform distribution over a given interval. *) 135 | val create : lower:float -> upper:float -> t 136 | 137 | (** Creates uniform distribution with a MLE of parameters, estimated 138 | from given data. *) 139 | val mle : float array -> t 140 | end 141 | 142 | (** The exponential distribution. 143 | 144 | The probability distribution of the times between events in a Poisson 145 | process, in which events occur continuously and independently at a 146 | constant average [rate]. *) 147 | module Exponential : sig 148 | type t = { exp_scale : float } 149 | 150 | include ContinuousDistribution with type t := t and type elt = float 151 | include Features.S with type t := t and type elt := float 152 | 153 | (** Creates exponential distribution. [scale] must be positive. *) 154 | val create : scale:float -> t 155 | 156 | (** Creates exponential distribution with a MLE of parameters, estimated 157 | from given data. *) 158 | val mle : float array -> t 159 | end 160 | 161 | (** The chi-squared distribution. 162 | 163 | The probability distribution of sum of squares of [df] independent 164 | standard normal distributions. *) 165 | module ChiSquared : sig 166 | type t = { chisq_df : float } 167 | 168 | include ContinuousDistribution with type t := t and type elt = float 169 | include Features.S with type t := t and type elt := float 170 | 171 | (** Creates chi-squared distribution. Number of degrees of freedom 172 | must be positive. *) 173 | val create : df:int -> t 174 | 175 | (** Creates chi-squared distribution with parameters, estimated with 176 | method of moments. *) 177 | val mme : float array -> t 178 | end 179 | 180 | (** Fisher-Snedecor distribution. *) 181 | module F : sig 182 | type t = { 183 | f_df1 : float; 184 | f_df2 : float 185 | } 186 | 187 | include ContinuousDistribution with type t := t and type elt = float 188 | include Features.Opt with type t := t and type elt := float 189 | 190 | (** Creates Fisher-Snedecor distribution with a given number of degrees 191 | of freedom. *) 192 | val create : df1:int -> df2:int -> t 193 | 194 | (** Creates Fisher-Snedecor distribution with parameters, estimated 195 | with method of moments. *) 196 | val mme : float array -> t 197 | end 198 | 199 | (** Student's t-distribution. *) 200 | module T : sig 201 | type t = { t_df : float } 202 | 203 | include ContinuousDistribution with type t := t and type elt = float 204 | include Features.Opt with type t := t and type elt := float 205 | 206 | (** Creates Student's t-distribution with a given number of degrees 207 | of freedom. *) 208 | val create : df:float -> t 209 | 210 | (** Creates Student's t-distribution with parameters, estimated with 211 | method of moments. *) 212 | val mme : float array -> t 213 | end 214 | 215 | (** The gamma distribution. *) 216 | module Gamma : sig 217 | type t = { 218 | gamma_shape : float; 219 | gamma_scale : float 220 | } 221 | 222 | include ContinuousDistribution with type t := t and type elt = float 223 | include Features.S with type t := t and type elt := float 224 | 225 | (** Creates gamma distribution. Both shape and scale must be positive. *) 226 | val create : shape:float -> scale:float -> t 227 | 228 | (** Creates gamma distribution with parameters, estimated with method 229 | of moments. *) 230 | val mme : float array -> t 231 | 232 | (** Creates gamma distribution with a MLE of parameters, estimated 233 | from given data. *) 234 | val mle : n_iter:int -> epsilon:float -> float array -> t 235 | end 236 | 237 | (** The Cauchy-Lorentz distribution. 238 | 239 | It doesn't have mean and variance. *) 240 | module Cauchy : sig 241 | type t = { 242 | cauchy_location : float; 243 | cauchy_scale : float 244 | } 245 | 246 | include ContinuousDistribution with type t := t and type elt = float 247 | 248 | (** Creates Cauchy-Lorentz distribution from parameters. *) 249 | val create : location:float -> scale:float -> t 250 | 251 | (** Cauchy-Lorentz distribution with 0 [location] and [scale] equal to 1. *) 252 | val standard : t 253 | end 254 | 255 | (** The beta distribution. *) 256 | module Beta : sig 257 | type t = { 258 | beta_alpha : float; 259 | beta_beta : float 260 | } 261 | 262 | include ContinuousDistribution with type t := t and type elt = float 263 | include Features.S with type t := t and type elt := float 264 | 265 | (** Creates beta distribution. Both shape parameters must be positive. *) 266 | val create : alpha:float -> beta:float -> t 267 | 268 | (** Creates beta distribution with parameters, estimated with method 269 | of moments. *) 270 | val mme : float array -> t 271 | end 272 | 273 | (** Logistic distribution. *) 274 | module Logistic : sig 275 | type t = { 276 | logistic_location : float; 277 | logistic_scale : float 278 | } 279 | 280 | include ContinuousDistribution with type t := t and type elt = float 281 | include Features.S with type t := t and type elt := float 282 | 283 | (** Creates logistic distribution. *) 284 | val create : location:float -> scale:float -> t 285 | 286 | (** Creates logistic distribution with parameters, estimated with method 287 | of moments. *) 288 | val mme : float array -> t 289 | end 290 | 291 | 292 | (** {2 Discrete distributions} *) 293 | 294 | (** The Poisson distribution. 295 | 296 | The probability distribution of a number of events occurring in a 297 | fixed interval if these events occur with a known average [rate], 298 | and occur independently from each other within that interval. *) 299 | module Poisson : sig 300 | type t = { poisson_rate : float } 301 | 302 | include DiscreteDistribution with type t := t and type elt = int 303 | include Features.S with type t := t and type elt := float 304 | 305 | (** Creates a Poisson distribution. [rate] must be positive. *) 306 | val create : rate:float -> t 307 | 308 | (** Creates a Poisson distribution with a MLE of parameters, estimated 309 | from given data. *) 310 | val mle : int array -> t 311 | end 312 | 313 | (** Bernoulli distribution. 314 | 315 | The probability distribution, which takes value [1] with success 316 | probability [p] and value [0] with failure probability [1 - p]. *) 317 | module Bernoulli : sig 318 | type t = { bernoulli_p : float } 319 | 320 | include DiscreteDistribution with type t := t and type elt = int 321 | include Features.S with type t := t and type elt := float 322 | 323 | (** Creates Bernoulli distribution with given success probability [p]. *) 324 | val create : p:float -> t 325 | 326 | (** Creates a Bernoulli distribution with a MLE of parameters, estimated 327 | from given data. *) 328 | val mle : int array -> t 329 | end 330 | 331 | (** The binomial distribution. 332 | 333 | The probability distribution of the number of successes in a sequence 334 | of independent Bernoulli [trials]. *) 335 | module Binomial : sig 336 | type t = { 337 | binomial_trials : int; 338 | binomial_p : float 339 | } 340 | 341 | include DiscreteDistribution with type t := t and type elt = int 342 | include Features.S with type t := t and type elt := float 343 | 344 | (** Creates binomial distribution. Number of [trials] must be 345 | non-negative. *) 346 | val create : trials:int -> p:float -> t 347 | 348 | (** Creates binomial distribution with parameters, estimated with 349 | method of moments. *) 350 | val mme : int array -> t 351 | end 352 | 353 | (** The Geometric distribution. 354 | 355 | The probability distribution of the number of failures before the 356 | first success, supported on the set [[0, 1, ...]]. *) 357 | module Geometric : sig 358 | type t = { geometric_p : float } 359 | 360 | include DiscreteDistribution with type t := t and type elt = int 361 | include Features.S with type t := t and type elt := float 362 | 363 | (** Creates Geometric distribution with a given probability of success. *) 364 | val create : p:float -> t 365 | 366 | (** Creates Geometric distribution with parameters, estimated with 367 | method of moments. *) 368 | val mme : int array -> t 369 | end 370 | 371 | (** The Hypergeometric distribution. 372 | 373 | The probability distribution of obtaining [k] elements of "type 1" 374 | in [t] samples from a population {b without} replacement, if the 375 | population contains [m] elements of "type 1" and [t - m] elements 376 | of "type 2". *) 377 | module Hypergeometric : sig 378 | type t = { 379 | hyper_m : int; 380 | hyper_t : int; 381 | hyper_k : int 382 | } 383 | 384 | include DiscreteDistribution with type t := t and type elt = int 385 | include Features.S with type t := t and type elt := float 386 | 387 | (** Creates Hypergeometric distribution. *) 388 | val create : m:int -> t:int -> k:int -> t 389 | end 390 | 391 | (** Negative Binomial distribution. 392 | 393 | The probability distribution of the number of succeses in a sequence 394 | of Bernoulli trials before a specified number of [failures] occurs. *) 395 | module NegativeBinomial : sig 396 | type t = { 397 | nbinomial_failures : float; 398 | nbinomial_p : float 399 | } 400 | 401 | include DiscreteDistribution with type t := t and type elt = int 402 | include Features.S with type t := t and type elt := float 403 | 404 | (** Creates negative Binomial distribution with a given number of 405 | failures and success probability. *) 406 | val create : failures:float -> p:float -> t 407 | 408 | (** Creates negative Binomial distribution with parameters, estimated 409 | with method of moments. *) 410 | val mme : int array -> t 411 | end 412 | 413 | module Categorical : sig 414 | module type OrderedType = sig 415 | type t 416 | 417 | val compare : t -> t -> int 418 | end 419 | 420 | module type S = sig 421 | include DiscreteDistribution 422 | 423 | (** Creates a categorical distribution over values of type [elt], 424 | where each value is given a probability, which defaults to [0] 425 | for values not in the list. *) 426 | val create : (elt * float) array -> t 427 | 428 | (** Creates a categorical distribution with a MLE of parameters, 429 | estimated from given data. *) 430 | val mle : elt array -> t 431 | end 432 | 433 | module Make 434 | : functor (Elt : Map.OrderedType) 435 | -> S with type elt = Elt.t 436 | end 437 | 438 | (** {2 Shortcuts for creating distributions} *) 439 | 440 | val normal : mean:float -> sd:float -> Normal.t 441 | val log_normal : mean:float -> sd:float -> LogNormal.t 442 | val uniform : lower:float -> upper:float -> Uniform.t 443 | val exponential : scale:float -> Exponential.t 444 | val chi_squared : df:int -> ChiSquared.t 445 | val f : df1:int -> df2:int -> F.t 446 | val t : df:float -> T.t 447 | val gamma : shape:float -> scale:float -> Gamma.t 448 | val cauchy : location:float -> scale:float -> Cauchy.t 449 | val beta : alpha:float -> beta:float -> Beta.t 450 | val logistic : location:float -> scale:float -> Logistic.t 451 | 452 | val poisson : rate:float -> Poisson.t 453 | val bernoulli : p:float -> Bernoulli.t 454 | val binomial : trials:int -> p:float -> Binomial.t 455 | val geometric : p:float -> Geometric.t 456 | val hypergeometric : m:int -> t:int -> k:int -> Hypergeometric.t 457 | val negative_binomial : failures:float -> p:float -> NegativeBinomial.t 458 | -------------------------------------------------------------------------------- /lib/internal.ml: -------------------------------------------------------------------------------- 1 | open StdLabels 2 | 3 | let default_rng = let open Gsl.Rng in 4 | env_setup (); 5 | make (default ()) 6 | 7 | let find_root_newton ~n_iter ~epsilon ~init gsl_fun = 8 | let open Gsl.Root.Polish in 9 | let solver = make NEWTON gsl_fun init in begin 10 | let counter = ref n_iter 11 | and r0 = ref nan 12 | and r1 = ref init in begin 13 | while !counter > 0 && 14 | (!r0 <> !r0 || abs_float (!r0 -. !r1) > epsilon) 15 | do 16 | iterate solver; 17 | decr counter; 18 | r0 := !r1; 19 | r1 := root solver 20 | done 21 | end; !r1 22 | end 23 | 24 | let sqr x = x *. x 25 | let cube x = x *. x *. x 26 | 27 | let is_nan (x : float) = x <> x 28 | let is_not_nan (x : float) = x = x 29 | 30 | let round x = int_of_float (floor (x +. 0.5)) 31 | 32 | 33 | module Array = struct 34 | include Array 35 | 36 | let sort_index ~cmp vs = 37 | let n = length vs in 38 | let order = Array.make n 0 in begin 39 | for i = 0 to n - 1 do 40 | Array.unsafe_set order i i 41 | done; 42 | 43 | sort ~cmp:(fun i j -> cmp (unsafe_get vs i) (unsafe_get vs j)) order; 44 | order 45 | end 46 | 47 | let count ~f = 48 | fold_left ~f:(fun acc v -> acc + if f v then 1 else 0) ~init:0 49 | 50 | let exists ~f vs = 51 | let rec loop i = 52 | if i < 0 53 | then false 54 | else f (unsafe_get vs i) || loop (pred i) 55 | in loop (length vs - 1) 56 | 57 | let for_all ~f vs = 58 | let rec loop i = 59 | if i < 0 60 | then true 61 | else f (unsafe_get vs i) && loop (pred i) 62 | in loop (length vs - 1) 63 | 64 | let partition ~f vs = 65 | let (l, r) = fold_left vs 66 | ~init:([], []) 67 | ~f:(fun (l, r) x -> if f x then (x :: l, r) else (l, x :: r)) 68 | in (Array.of_list l, Array.of_list r) 69 | end 70 | 71 | module Matrix_flat = struct 72 | include Gsl.Matrix_flat 73 | 74 | let exists m ~f = 75 | let (nrow, ncol) = dims m in 76 | let rec loop_columns i j = 77 | if j < 0 78 | then false 79 | else f (get m i j) || loop_columns i (pred j) 80 | and loop_rows i = 81 | if i < 0 82 | then false 83 | else loop_columns i (ncol - 1) || loop_rows (pred i) 84 | in loop_rows (nrow - 1) 85 | 86 | let map m ~f = 87 | let (nrow, ncol) = dims m in begin 88 | for i = 0 to nrow - 1 do 89 | for j = 0 to ncol - 1 do 90 | set m i j (f (get m i j)) 91 | done 92 | done 93 | end 94 | 95 | let row_sums m = 96 | let (nrow, ncol) = dims m in 97 | let res = create ~init:0. 1 nrow in begin 98 | for i = 0 to nrow - 1 do 99 | for j = 0 to ncol - 1 do 100 | set res 0 i (get res 0 i +. get m i j) 101 | done 102 | done; res 103 | end 104 | and col_sums m = 105 | let (nrow, ncol) = dims m in 106 | let res = create ~init:0. 1 ncol in begin 107 | for j = 0 to ncol - 1 do 108 | for i = 0 to nrow - 1 do 109 | set res 0 j (get res 0 j +. get m i j) 110 | done 111 | done; res 112 | end 113 | 114 | let sum m = 115 | let (nrow, ncol) = dims m in 116 | let acc = ref 0. in begin 117 | for i = 0 to nrow - 1 do 118 | for j = 0 to ncol - 1 do 119 | acc := !acc +. get m i j 120 | done 121 | done; ! acc 122 | end 123 | 124 | let power m = 125 | let open Gsl.Blas_flat in 126 | let mul a b = 127 | assert (dims a = dims b); 128 | let (w, h) = dims a in 129 | let res = create ~init:0. w h in begin 130 | gemm ~ta:NoTrans ~tb:NoTrans ~alpha:1. ~beta:1. ~a ~b ~c:res; 131 | res 132 | end 133 | in 134 | 135 | let rec go m = function 136 | | 1 -> m 137 | | k -> 138 | let m2 = go m (k / 2) in 139 | let mm = mul m2 m2 in 140 | if k mod 2 = 0 141 | then mm 142 | else mul mm m 143 | in go m 144 | end 145 | -------------------------------------------------------------------------------- /lib/pareto.mldylib: -------------------------------------------------------------------------------- 1 | # OASIS_START 2 | # DO NOT EDIT (digest: b71122ac52aff447e1216d0df0528f81) 3 | Pareto 4 | # OASIS_STOP 5 | -------------------------------------------------------------------------------- /lib/pareto.mllib: -------------------------------------------------------------------------------- 1 | # OASIS_START 2 | # DO NOT EDIT (digest: b71122ac52aff447e1216d0df0528f81) 3 | Pareto 4 | # OASIS_STOP 5 | -------------------------------------------------------------------------------- /lib/pareto.mlpack: -------------------------------------------------------------------------------- 1 | # OASIS_START 2 | # DO NOT EDIT (digest: c9403007191710ce601dbcb0c2f6e00c) 3 | Base 4 | Distributions 5 | Resampling 6 | Sample 7 | Tests 8 | Internal 9 | # OASIS_STOP 10 | -------------------------------------------------------------------------------- /lib/resampling.ml: -------------------------------------------------------------------------------- 1 | open Internal 2 | 3 | 4 | let jackknife ~estimator vs = 5 | let n = Array.length vs in 6 | if n = 0 7 | then [||] 8 | else 9 | let init = Array.unsafe_get vs 0 in 10 | Array.init n ~f:(fun i -> 11 | let holey = Array.make (n - 1) init in begin 12 | Array.blit ~src:vs ~src_pos:0 ~dst:holey ~dst_pos:0 ~len:i; 13 | Array.blit ~src:vs ~src_pos:(i + 1) 14 | ~dst:holey ~dst_pos:i ~len:(n - i - 1); 15 | estimator holey 16 | end) 17 | 18 | 19 | let resample ?rng ~estimator ~n_iter vs = 20 | Array.init n_iter 21 | ~f:(fun _i -> estimator (Base.sample ?rng ~replace:true vs)) 22 | 23 | 24 | module Bootstrap = struct 25 | type estimate = { 26 | point : float; 27 | lower_bound : float; 28 | upper_bound : float; 29 | confidence_level : float 30 | } 31 | 32 | let bca ?rng ?(confidence_level=0.95) ~estimator ~n_iter vs = 33 | if confidence_level <= 0. || confidence_level >= 1. 34 | then invalid_arg "Bootstrap.bca: confidence level must be in range (0, 1)"; 35 | 36 | let point = estimator vs in 37 | if Array.length vs = 1 38 | then { point; lower_bound = point; upper_bound = point; confidence_level } 39 | else 40 | let rvs = resample ?rng ~estimator ~n_iter vs in 41 | Array.sort ~cmp:compare rvs; (* Sort, since we need percentiles. *) 42 | 43 | let jack = jackknife ~estimator vs in 44 | let jack_mean = Sample.mean jack in 45 | let z_2s = Array.map jack ~f:(fun v -> sqr (jack_mean -. v)) in 46 | let sum_cubes = Array.fold_left z_2s ~init:0. 47 | ~f:(fun acc z_2 -> acc +. sqr z_2) 48 | and sum_squares = Array.fold_left z_2s ~f:(+.) ~init:0. in 49 | let accel = sum_cubes /. (6. *. (sum_squares ** 1.5)) in 50 | let p = float_of_int (Array.count ~f:(fun v -> v < point) rvs) /. 51 | float_of_int n_iter 52 | in 53 | 54 | let open Distributions.Normal in 55 | let bias = quantile standard ~p 56 | and z = quantile standard ~p:((1. -. confidence_level) /. 2.) in 57 | let b1 = bias +. z 58 | and b2 = bias -. z in 59 | let a1 = bias +. b1 /. (1. -. accel *. b1) 60 | and a2 = bias +. b2 /. (1. -. accel *. b2) in 61 | 62 | let pnorm x = 63 | round (cumulative_probability standard ~x *. float_of_int n_iter) 64 | in { 65 | point; 66 | lower_bound = rvs.(max (pnorm a1) 0); 67 | upper_bound = rvs.(min (pnorm a2) (n_iter - 1)); 68 | confidence_level 69 | } 70 | end 71 | -------------------------------------------------------------------------------- /lib/resampling.mli: -------------------------------------------------------------------------------- 1 | (** Resampling statistics. *) 2 | 3 | (** Repeatidly resamples a given data set with replacement, computing a 4 | statistical estimate over the resampled data. *) 5 | val resample 6 | : ?rng:Gsl.Rng.t 7 | -> estimator:('a array -> 'b) 8 | -> n_iter:int 9 | -> 'a array 10 | -> 'b array 11 | 12 | (** Repeatidly computes a statistical estimate over the data set, leaving 13 | out a single observation at a time. *) 14 | val jackknife : estimator:('a array -> 'b) -> 'a array -> 'b array 15 | 16 | 17 | module Bootstrap : sig 18 | type estimate = { 19 | point : float; (** Point estimate. *) 20 | lower_bound : float; (** Lower bound of the estimate confidence 21 | interval *) 22 | upper_bound : float; (** Upper bound of the estimate confidence 23 | interval *) 24 | confidence_level : float (** Condifence level corresponding to the 25 | above intervals. *) 26 | } 27 | 28 | (** Bias-corrected and accelerated (BCa) bootstrap. *) 29 | val bca 30 | : ?rng:Gsl.Rng.t 31 | -> ?confidence_level:float 32 | -> estimator:(float array -> float) 33 | -> n_iter:int 34 | -> float array 35 | -> estimate 36 | end 37 | -------------------------------------------------------------------------------- /lib/sample.ml: -------------------------------------------------------------------------------- 1 | open Internal 2 | 3 | module Stats = Gsl.Stats 4 | module Histo = Gsl.Histo 5 | 6 | let min = Stats.min 7 | and max = Stats.max 8 | and minmax = Stats.minmax 9 | let range vs = 10 | let (min, max) = minmax vs in 11 | max -. min 12 | 13 | let moments k vs = 14 | let n = Array.length vs in 15 | if n = 0 16 | then Array.make k nan 17 | else 18 | let ms = Array.make k 0. in begin 19 | for i = 0 to n - 1 do 20 | let v = Array.unsafe_get vs i in 21 | let acc = ref v in 22 | for p = 0 to k - 1 do 23 | Array.unsafe_set ms p 24 | (Array.unsafe_get ms p +. !acc); 25 | acc := !acc *. v 26 | done 27 | done; 28 | 29 | for p = 0 to k - 1 do 30 | Array.unsafe_set ms p (Array.unsafe_get ms p /. float_of_int n) 31 | done; ms 32 | end 33 | 34 | let mean vs = Stats.mean vs 35 | let variance ?mean vs = Stats.variance ?mean vs 36 | let sd ?mean vs = Stats.sd ?mean vs 37 | 38 | let skewness ?mean ?sd vs = match (mean, sd) with 39 | | (Some mean, Some sd) -> Stats.skew_m_sd ~mean ~sd vs 40 | | (Some mean, None) -> Stats.skew_m_sd ~mean ~sd:(Stats.sd ~mean vs) vs 41 | | (None, Some sd) -> Stats.skew_m_sd ~mean:(Stats.mean vs) ~sd vs 42 | | (None, None) -> Stats.skew vs 43 | 44 | let kurtosis ?mean ?sd vs = match (mean, sd) with 45 | | (Some mean, None) -> 46 | Stats.kurtosis_m_sd ~mean ~sd:(Stats.sd ~mean vs) vs 47 | | (None, Some sd) -> 48 | Stats.kurtosis_m_sd ~mean:(Stats.mean vs) ~sd vs 49 | | (Some mean, Some sd) -> Stats.kurtosis_m_sd ~mean ~sd vs 50 | | (None, None) -> Stats.kurtosis vs 51 | 52 | 53 | let _resolve_ties next d = function 54 | | `Average -> float_of_int next -. float_of_int d /. 2. 55 | | `Min -> float_of_int (next - d) 56 | | `Max -> float_of_int next 57 | 58 | let _correct_ties ranks = 59 | let n = Array.length ranks in 60 | if n < 2 61 | then 1.0 62 | else 63 | let sorted = Array.copy ranks 64 | and t = ref 0 65 | and d = ref 0 66 | and i = ref 0 in begin 67 | Array.sort ~cmp:(fun r1 r2 -> 68 | compare (int_of_float r1) (int_of_float r2)) sorted; 69 | 70 | while !i < n - 1 do 71 | if sorted.(!i) = sorted.(!i + 1) 72 | then begin 73 | d := 1; 74 | while !i < n - 1 && sorted.(!i) = sorted.(!i + 1) do 75 | incr d; 76 | incr i; 77 | done; 78 | 79 | t := !t + (!d * !d * !d - !d) 80 | end; incr i 81 | done; float_of_int !t 82 | end 83 | 84 | let rank ?(ties_strategy=`Average) ?(cmp=compare) vs = 85 | let n = Array.length vs in 86 | let order = Array.sort_index ~cmp vs in 87 | let ranks = Array.make n 0. in 88 | let d = ref 0 in begin 89 | for i = 0 to n - 1 do 90 | if i == n - 1 || cmp vs.(order.(i)) vs.(order.(i + 1)) <> 0 91 | then 92 | let tie_rank = _resolve_ties (i + 1) !d ties_strategy in 93 | for j = i - !d to i do 94 | ranks.(order.(j)) <- tie_rank 95 | done; 96 | d := 0 97 | else 98 | incr d (* Found a duplicate! *) 99 | done; 100 | end; (_correct_ties ranks, ranks) 101 | 102 | 103 | let histogram ?(n_bins=10) ?range ?weights ?(density=false) vs = 104 | if n_bins <= 0 105 | then invalid_arg "Sample.histogram: n_bins must be a positive integer" 106 | else if vs = [||] then invalid_arg "Sample.histogram: no data" 107 | else begin 108 | match weights with 109 | | Some weights when Array.length weights <> Array.length vs -> 110 | invalid_arg "Sample.histogram: expected one weight per value" 111 | | _ -> () 112 | end; 113 | 114 | let h = Histo.make n_bins in begin 115 | match range with 116 | | None -> 117 | let (min, max) = minmax vs in 118 | let d = 119 | if n_bins = 1 120 | then 0. 121 | else (max -. min) /. float_of_int ((n_bins - 1) * 2) 122 | in Histo.set_ranges_uniform h ~xmin:(min -. d) ~xmax:(max +. d) 123 | | Some (min, max) when min < max -> 124 | Histo.set_ranges_uniform h ~xmin:min ~xmax:max 125 | | Some (_min, _max) -> 126 | invalid_arg ("Sample.histogram: max must be larger than min " ^ 127 | " in range paramter") 128 | end; 129 | 130 | for i = 0 to Array.length vs - 1 do 131 | let w = match weights with 132 | | None -> 1. 133 | | Some weights -> Array.unsafe_get weights i 134 | in Histo.accumulate h ~w (Array.unsafe_get vs i) 135 | done; 136 | 137 | let counts = Array.make n_bins 0. in 138 | let points = Array.make n_bins 0. in begin 139 | if density then Histo.scale h (1. /. Histo.sum h); 140 | for i = 0 to n_bins - 1 do 141 | Array.unsafe_set counts i (Histo.get h i); 142 | Array.unsafe_set points i (fst (Histo.get_range h i)) 143 | done; (points, counts) 144 | end 145 | 146 | 147 | module Quantile = struct 148 | type continuous_param = 149 | | CADPW 150 | | Hazen 151 | | SPSS 152 | | S 153 | | MedianUnbiased 154 | | NormalUnbiased 155 | 156 | let continuous_by ?(param=S) ~ps vs = 157 | if Array.exists ~f:(fun p -> p < 0. || p > 1.) ps 158 | then invalid_arg "Quantile.continuous_by: p must be in range [0, 1]"; 159 | if Array.exists ~f:is_nan vs 160 | then invalid_arg "Quantile.continuous_by: sample contains NaNs"; 161 | 162 | let (a, b) = match param with 163 | | CADPW -> (0., 1.) 164 | | Hazen -> (0.5, 0.5) 165 | | SPSS -> (0., 0.) 166 | | S -> (1., 1.) 167 | | MedianUnbiased -> (1. /. 3., 1. /. 3.) 168 | | NormalUnbiased -> (3. /. 8., 3. /. 8.) 169 | in 170 | 171 | let n = Array.length vs in 172 | let fuzz = epsilon_float *. 4. 173 | and js = Array.make n 0. 174 | and hs = Array.make n 0. in begin 175 | for i = 0 to Array.length ps - 1 do 176 | let p = Array.unsafe_get ps i in 177 | let nppm = a +. p *. (float_of_int n +. 1. -. a -. b) in 178 | let j = floor (nppm +. fuzz) in 179 | let h = if abs_float (nppm -. j) < fuzz 180 | then 0. 181 | else nppm -. j 182 | in begin 183 | Array.unsafe_set js i j; 184 | Array.unsafe_set hs i h; 185 | end 186 | done 187 | end; 188 | 189 | let bound ?(a=0) ~b i = Pervasives.min (Pervasives.max i a) b in 190 | let svs = Gsl.Gsl_sort.vector_flat_smallest 191 | (bound ~b:n (int_of_float (max js) + 1)) 192 | (Gsl.Vector_flat.of_array vs) in 193 | let item = fun i -> svs.(bound ~b:(n - 1) i) 194 | and qs = Array.make (Array.length ps) 0. in begin 195 | for i = 0 to Array.length ps - 1 do 196 | let j = int_of_float (Array.unsafe_get js i) in 197 | let h = Array.unsafe_get hs i in 198 | let q = (1. -. h) *. item (j - 1) +. h *. item j in 199 | Array.unsafe_set qs i q 200 | done; qs 201 | end 202 | 203 | let iqr ?param vs = 204 | match continuous_by ?param ~ps:[|0.25; 0.75|] vs with 205 | | [|q25; q75|] -> q75 -. q25 206 | | _ -> assert false (* Impossible. *) 207 | end 208 | 209 | let quantile ~ps vs = Quantile.continuous_by ~ps vs 210 | 211 | let iqr vs = Quantile.iqr vs 212 | 213 | 214 | module KDE = struct 215 | type bandwidth = 216 | | Silverman 217 | | Scott 218 | 219 | type kernel = 220 | | Gaussian 221 | 222 | let build_kernel = function 223 | | Gaussian -> 224 | fun h p v -> 225 | let u = (v -. p) /. h in 226 | let open Gsl.Math in 227 | (1. /. (sqrt2 *. sqrtpi)) *. exp (-. sqr u /. 2.) 228 | 229 | let build_points n_points h kernel vs = 230 | let (min, max) = minmax vs in 231 | let (a, b) = match kernel with 232 | | Gaussian -> (min -. 3. *. h, max +. 3. *. h) 233 | in 234 | 235 | let points = Array.make n_points 0. 236 | and step = (b -. a) /. float_of_int n_points in begin 237 | for i = 0 to n_points - 1 do 238 | Array.unsafe_set points i (a +. (float_of_int i) *. step) 239 | done; points 240 | end 241 | 242 | let estimate_pdf ?(kernel=Gaussian) ?(bandwidth=Scott) ?(n_points=512) vs = 243 | if Array.length vs < 2 244 | then invalid_arg "KDE.estimate_pdf: sample should have multiple elements"; 245 | 246 | let n = float_of_int (Array.length vs) in 247 | let s = Pervasives.min (sd vs) (iqr vs /. 1.34) in 248 | let h = match bandwidth with 249 | | Silverman -> 0.90 *. s *. (n ** -0.2) 250 | | Scott -> 1.06 *. s *. (n ** -0.2) 251 | in 252 | 253 | let points = build_points n_points h kernel vs in 254 | let k = build_kernel kernel in 255 | let f = 1. /. (h *. n) in 256 | let pdf = Array.make n_points 0. in begin 257 | for i = 0 to n_points - 1 do 258 | let p = Array.unsafe_get points i in 259 | Array.unsafe_set pdf i 260 | (f *. Array.fold_left ~f:(fun acc v -> acc +. k h p v) ~init:0. vs) 261 | done; (points, pdf) 262 | end 263 | end 264 | 265 | 266 | module Correlation = struct 267 | let pearson v1 v2 = 268 | let n = Array.length v1 in 269 | if Array.length v2 <> n 270 | then invalid_arg "Correlation.pearson: unequal length arrays"; 271 | 272 | let v1_mean = mean v1 273 | and v2_mean = mean v2 in 274 | let v12_sd = sd ~mean:v1_mean v1 *. sd ~mean:v2_mean v2 275 | and acc = ref 0. in 276 | for i = 0 to n - 1 do 277 | let v1 = Array.unsafe_get v1 i 278 | and v2 = Array.unsafe_get v2 i 279 | in acc := !acc +. (v1 -. v1_mean) *. (v2 -. v2_mean) 280 | done; !acc /. float_of_int (n - 1) /. v12_sd 281 | 282 | let spearman ?cmp v1 v2 = 283 | let n = Array.length v1 in 284 | if Array.length v2 <> n 285 | then invalid_arg "Correlation.spearman: unequal length arrays" 286 | else 287 | (* Note(superbobry): according to Wikipedia, ties strategy is 288 | fixed to [`Average]. *) 289 | let f vs = snd (rank ~ties_strategy:`Average ?cmp vs) in 290 | pearson (f v1) (f v2) 291 | 292 | module Auto = struct 293 | let pearson vs = 294 | let n = Array.length vs in 295 | if n < 2 296 | then [||] 297 | else 298 | let mean = Stats.mean vs in 299 | let acf shift = 300 | let acc = ref 0. in 301 | for i = 0 to n - shift - 1 do 302 | let v_i = Array.unsafe_get vs i 303 | and v_s = Array.unsafe_get vs (i + shift) in 304 | acc := !acc +. (v_s -. mean) *. (v_i -. mean) 305 | done; !acc /. float_of_int n 306 | in 307 | 308 | let ac = Array.init n ~f:acf in 309 | let ac0 = ac.(0) in begin 310 | for i = 0 to n - 1 do 311 | Array.unsafe_set ac i (Array.unsafe_get ac i /. ac0) 312 | done; ac 313 | end 314 | end 315 | end 316 | 317 | 318 | module Summary = struct 319 | type t = { 320 | k : int; 321 | m_1 : float; 322 | m_2 : float; 323 | m_3 : float; 324 | m_4 : float; 325 | max_k : float; 326 | min_k : float 327 | } 328 | 329 | let empty = { 330 | m_1 = 0.0; 331 | m_2 = 0.0; 332 | m_3 = 0.0; 333 | m_4 = 0.0; 334 | k = 0; 335 | max_k = min_float; 336 | min_k = max_float; 337 | } 338 | 339 | let combine t1 t2 = 340 | if t1.k = 0 341 | then t2 342 | else if t2.k = 0 343 | then t1 344 | else 345 | (* Note(superbobry): we can optimize it later, but right now I'd 346 | rather have these formulas match Wikipedia _directly_. *) 347 | let delta = t2.m_1 -. t1.m_1 348 | and t1k = float_of_int t1.k 349 | and t2k = float_of_int t2.k 350 | and tnk = float_of_int (t1.k + t2.k) 351 | in { 352 | m_1 = (t1k *. t1.m_1 +. t2k *. t2.m_1) /. tnk; 353 | m_2 = t1.m_2 +. t2.m_2 +. sqr delta *. t1k *. t2k /. tnk; 354 | m_3 = t1.m_3 +. t2.m_3 355 | +. cube delta *. t1k *. t2k *. (t1k -. t2k) /. sqr tnk 356 | +. 3. *. delta *. (t1k *. t2.m_2 -. t2k *. t1.m_2) /. tnk; 357 | m_4 = t1.m_4 +. t2.m_4 358 | +. ((delta ** 4.) *. t1k *. t2k *. 359 | (sqr t1k -. t1k *. t2k +. sqr t2k) /. cube tnk) 360 | +. (6. *. sqr delta *. 361 | (sqr t1k *. t2.m_2 +. sqr t2k *. t1.m_2) /. sqr tnk) 362 | +. (4. *. delta *. (t1k *. t2.m_3 -. t2k *. t1.m_3) /. tnk); 363 | k = t1.k + t2.k; 364 | max_k = Pervasives.max t1.max_k t2.max_k; 365 | min_k = Pervasives.min t1.min_k t2.min_k 366 | } 367 | 368 | let add t x_k = 369 | let n_k = float_of_int (succ t.k) in 370 | let delta = x_k -. t.m_1 in 371 | let delta_nk = delta /. n_k in 372 | let delta_m_2 = delta *. delta_nk *. float_of_int t.k in 373 | { 374 | m_1 = t.m_1 +. delta_nk; 375 | m_2 = t.m_2 +. delta_m_2; 376 | m_3 = t.m_3 +. 377 | delta_m_2 *. delta_nk *. (n_k -. 2.) -. 378 | (3. *. delta_nk *. t.m_2); 379 | m_4 = t.m_4 +. 380 | delta_m_2 *. sqr delta_nk *. (sqr n_k -. 3. *. n_k +. 3.) +. 381 | 6. *. sqr delta_nk *. t.m_2 -. 382 | 4. *. delta_nk *. t.m_3; 383 | k = succ t.k; 384 | min_k = Pervasives.min t.min_k x_k; 385 | max_k = Pervasives.max t.max_k x_k; 386 | } 387 | 388 | let size { k; _ } = k 389 | 390 | let min { min_k; _ } = if min_k = max_float then nan else min_k 391 | and max { max_k; _ } = if max_k = min_float then nan else max_k 392 | 393 | let mean t = if t.k > 0 then t.m_1 else nan 394 | 395 | let variance t = match t.k with 396 | (* Note(superbobry): we follow R and GSL here and treat variance 397 | of a single number undefined. *) 398 | | 0 | 1 -> nan 399 | | _ -> t.m_2 /. float_of_int (pred t.k) 400 | let sd t = sqrt (variance t) 401 | 402 | let skewness t = 403 | if t.k = 0 404 | then nan 405 | else sqrt (float_of_int t.k) *. t.m_3 /. (t.m_2 ** 1.5) 406 | and kurtosis t = 407 | if t.k = 0 408 | then nan 409 | else float_of_int t.k *. t.m_4 /. (t.m_2 *. t.m_2) -. 3. 410 | end 411 | -------------------------------------------------------------------------------- /lib/sample.mli: -------------------------------------------------------------------------------- 1 | (** Commonly used sample statistics. *) 2 | 3 | val min : float array -> float 4 | val max : float array -> float 5 | val minmax : float array -> (float * float) 6 | 7 | (** {e O(n)} Computes sample's range, i. e. the difference between the 8 | largest and smallest elements of a sample. *) 9 | val range : float array -> float 10 | 11 | (** {e O(n k)} Computes an array of sample moments of order 1 to k, i. e. 12 | [E{X^1}, E{X^2}, ..., E{X^k}]. *) 13 | val moments : int -> float array -> float array 14 | 15 | (** {e O(n)} Computes sample's arithmetic mean. *) 16 | val mean : float array -> float 17 | 18 | (** {e O(n)} Computes unbiased estimate of a sample's variance, also 19 | known as the {e sample variance}, where the denominator is [n - 1]. *) 20 | val variance : ?mean:float -> float array -> float 21 | 22 | (** {e O(n)} Computes sample's standard deviation. *) 23 | val sd : ?mean:float -> float array -> float 24 | 25 | (** {e O(n)} Computes the skewness of a sample, which is a measure of 26 | asymmetry of its distribution. *) 27 | val skewness : ?mean:float -> ?sd:float -> float array -> float 28 | 29 | (** {e O(n)} Computes the excess kurtosis of a sample, which is a 30 | measure of a "peakedness" of its distribution. *) 31 | val kurtosis : ?mean:float -> ?sd:float -> float array -> float 32 | 33 | 34 | (** {e O(n log n)} Computes sample's ranks, [ties_strategy] controls 35 | which ranks are assigned to equal values: 36 | 37 | - [`Average] the average of ranks should be assigned to each value. 38 | {b Default}. 39 | - [`Min] the minimum of ranks is assigned to each value. 40 | - [`Max] the maximum of ranks is assigned to each value. 41 | 42 | Returns a pair, where the first element is ties correction factor 43 | and second is an array of sample ranks. 44 | 45 | {b References} 46 | 47 | + P. R. Freeman, "Algorithm AS 26: Ranking an array of numbers", 48 | Vol. 19, Applied Statistics, pp111-113, 1970. *) 49 | val rank 50 | : ?ties_strategy:[`Average | `Min | `Max] 51 | -> ?cmp:('a -> 'a -> int) 52 | -> 'a array 53 | -> (float * float array) 54 | 55 | (** {e O(n)} Computes histogram of a data set. Bin sizes are uniform, 56 | based on a given [range], whic defaults to 57 | [(min - k, max + k)], where [k = (min - max) / (bins - 1) * 2]. 58 | This behaviour is copied from the excellent 59 | {{: http://github.com/bos/statistics} statistics} library by 60 | Brian O'Sullivan. *) 61 | val histogram 62 | : ?n_bins:int 63 | -> ?range:(float * float) 64 | -> ?weights:float array 65 | -> ?density:bool 66 | -> float array 67 | -> (float array * float array) 68 | 69 | 70 | module Quantile : sig 71 | (** Parameters for the continious sample method. *) 72 | type continuous_param = 73 | | CADPW (** Linear interpolation of the {e ECDF}. *) 74 | | Hazen (** Hazen's definition. *) 75 | | SPSS (** Definition used by the SPSS statistics application, 76 | also known as Weibull's definition. *) 77 | | S (** Definition used by the S statistics application.org 78 | Interpolation points divide the sample range into 79 | [n - 1] intervals. {b Default}. *) 80 | | MedianUnbiased (** Median unbiased definition. The resulting quantile 81 | estimates are approximately median unbiased 82 | regardless of the distribution of [vs] *) 83 | | NormalUnbiased (** Normal unbiased definition. An approximately unbiased 84 | estimate if the empirical distribution approximates 85 | the normal distribution. *) 86 | 87 | (** {e O(n log n)} Estimates sample quantile corresponding to the given 88 | probability [p], using the continuous sample method with given 89 | parameters. *) 90 | val continuous_by 91 | : ?param:continuous_param -> ps:float array -> float array -> float array 92 | 93 | (** {e O(n log n)} Estimates interquantile range of a given sample, 94 | using the continuous sample method with given parameters. *) 95 | val iqr : ?param:continuous_param -> float array -> float 96 | end 97 | 98 | (** {e O(n log n)} Estimates sample quantile corresponding to the given 99 | probability [p], using the continuous sample method with default 100 | parameters. *) 101 | val quantile : ps:float array -> float array -> float array 102 | 103 | (** {e O(n log n)} Estimates interquantile range of a given sample, 104 | using the continuous sample method with given parameters. *) 105 | val iqr : float array -> float 106 | 107 | 108 | module KDE : sig 109 | (** Bandwidth selection rules. *) 110 | type bandwidth = 111 | | Silverman (** Use {e rule-of-thumb} for choosing the bandwidth. 112 | It defaults to 113 | [0.9 * min(SD, IQR / 1.34) * n^-0.2]. *) 114 | | Scott (** Same as [Silverman], but with a factor, equal to 115 | [1.06]. *) 116 | 117 | type kernel = 118 | | Gaussian 119 | 120 | (** {e O(n * points)} Simple kernel density estimator. Returns an array 121 | of uniformly spaced points from the sample range at which the 122 | density function was estimated, and the estimates at these points. *) 123 | val estimate_pdf 124 | : ?kernel:kernel 125 | -> ?bandwidth:bandwidth 126 | -> ?n_points:int 127 | -> float array 128 | -> (float array * float array) 129 | 130 | (** {6 Example} 131 | 132 | {[ 133 | open Pareto 134 | let open Distributions.Normal in 135 | let vs = sample ~size:100 standard in 136 | let (points, pdf) = Sample.KDE.estimate_pdf ~points:10 vs in begin 137 | (* Output an ASCII density plot. *) 138 | Array.iteri (fun i d -> 139 | let count = int_of_float (d *. 20.) in 140 | printf "%9.5f " points.(i); 141 | for i = 0 to count do 142 | print_char (if i = count then '.' else ' '); 143 | done; 144 | 145 | print_newline (); 146 | ) pdf 147 | end 148 | ]} 149 | 150 | {6 References} 151 | 152 | + B.W. Silverman, "Density Estimation for Statistics and Data 153 | Analysis", Vol. 26, Monographs on Statistics and Applied 154 | Probability, Chapman and Hall, London, 1986. *) 155 | end 156 | 157 | 158 | module Correlation : sig 159 | (** {e O(n)} Computes Pearson product-moment correlation coefficient 160 | for two given samples. *) 161 | val pearson : float array -> float array -> float 162 | 163 | (** {e O(n log n)} Computes Spearman rank correlation coefficient for 164 | two given samples, which is essentially Pearson correlation 165 | calculated for sample ranks. *) 166 | val spearman : ?cmp:('a -> 'a -> int) -> 'a array -> 'a array -> float 167 | 168 | (** Autocorrelation, i. e. the correlation of the sample against a 169 | shifted version of itself. *) 170 | module Auto : sig 171 | (** {e O(n^2)} Computes autocorrelation, using Person product-moment 172 | correlation coefficient. *) 173 | val pearson : float array -> float array 174 | end 175 | end 176 | 177 | 178 | (** Calculates summary statistics over a possibly infinite stream of data. 179 | 180 | The algorithm runs in {e O(1)} space and {e O(n)} time. 181 | 182 | It is preferred for computing standard deviation, because roundoff 183 | errors in floating point operations might lead to taking a square 184 | root of a negative value. 185 | 186 | {6 References} 187 | 188 | + D. E. Knuth, "The Art of Computer Programming, Volume 2: 189 | Seminumerical Algorithms", 2nd edition, Section 4.2.2, p216. 190 | + T. B. Terriberry, "Computing Higher-Order Moments Online", 2008, 191 | http://people.xiph.org/~tterribe/notes/homs.html *) 192 | module Summary : sig 193 | type t 194 | 195 | (** Empty data set. *) 196 | val empty : t 197 | 198 | (** Combines statistics from two data sets. *) 199 | val combine : t -> t -> t 200 | 201 | (** Adds a value to the data set. *) 202 | val add : t -> float -> t 203 | 204 | (** Returns the maximum added value or [nan] if the data set is empty. *) 205 | val max : t -> float 206 | 207 | (** Returns the minimum added value or [nan] is the data set is empty. *) 208 | val min : t -> float 209 | 210 | (** Returns the number of available values. *) 211 | val size : t -> int 212 | 213 | (** Returns the arithmetic mean of the values that have been added 214 | or [nan] if the data set is empty. *) 215 | val mean : t -> float 216 | 217 | (** Returns the variance of the available values or [nan] if the 218 | data set is empty. *) 219 | val variance : t -> float 220 | 221 | (** Returns the standard deviation of the values that have been added 222 | or [nan] if the data set is empty. *) 223 | val sd : t -> float 224 | 225 | (** Returns the skewness of the values that have been added or [nan] if 226 | the data set is empty. 227 | 228 | {b Note}: for small sample sizes estimated value might be inaccurate, 229 | See issue #20. *) 230 | val skewness : t -> float 231 | 232 | (** Returns the excess kurtosis of the values that have been added 233 | or [nan] if the data set is empty. 234 | 235 | {b Note}: for small sample sizes estimated value might be inaccurate, 236 | See issue #20. *) 237 | val kurtosis : t -> float 238 | end 239 | -------------------------------------------------------------------------------- /lib/tests.ml: -------------------------------------------------------------------------------- 1 | open Internal 2 | 3 | type test_alternative = Less | Greater | TwoSided 4 | 5 | type test_result = { 6 | test_statistic : float; 7 | test_pvalue : float 8 | } 9 | 10 | let run_test ?(significance_level=0.05) f = 11 | let { test_pvalue = pvalue; _ } = f () in 12 | if pvalue <= significance_level 13 | then `NotSignificant 14 | else `Significant 15 | 16 | 17 | module T = struct 18 | let finalize d t alternative = 19 | let open Distributions.T in 20 | let pvalue = match alternative with 21 | | Less -> cumulative_probability d ~x:t 22 | | Greater -> 1. -. cumulative_probability d ~x:t 23 | | TwoSided -> 2. *. cumulative_probability d ~x:(-. (abs_float t)) 24 | in { test_statistic = t; test_pvalue = pvalue } 25 | 26 | let one_sample v ?(mean=0.) ?(alternative=TwoSided) () = 27 | let n = float_of_int (Array.length v) in 28 | (* Note(superbobry): R uses uncorrected sample variance, so the 29 | exact value of t-statistic might be slightly different. *) 30 | let t = (Sample.mean v -. mean) *. sqrt (n /. Sample.variance v) 31 | in finalize (Distributions.T.create ~df:(n -. 1.)) t alternative 32 | 33 | let two_sample_independent v1 v2 34 | ?(equal_variance=true) ?(mean=0.) ?(alternative=TwoSided) () = 35 | let n1 = float_of_int (Array.length v1) 36 | and n2 = float_of_int (Array.length v2) 37 | and (var1, var2) = (Sample.variance v1, Sample.variance v2) in 38 | 39 | let (df, denom) = if equal_variance 40 | then 41 | let df = n1 +. n2 -. 2. in 42 | let var12 = ((n1 -. 1.) *. var1 +. (n2 -. 1.) *. var2) /. df 43 | in (df, sqrt (var12 *. (1. /. n1 +. 1. /. n2))) 44 | else 45 | let vn1 = var1 /. n1 in 46 | let vn2 = var2 /. n2 in 47 | let df = 48 | sqr (vn1 +. vn2) /. (sqr vn1 /. (n1 -. 1.) +. sqr vn2 /. (n2 -. 1.)) 49 | in (df, sqrt (vn1 +. vn2)) 50 | in 51 | 52 | let t = (Sample.mean v1 -. Sample.mean v2 -. mean) /. denom in 53 | finalize (Distributions.T.create ~df) t alternative 54 | 55 | let two_sample_paired v1 v2 ?(mean=0.) ?(alternative=TwoSided) () = 56 | let n = Array.length v1 in 57 | if n <> Array.length v2 58 | then invalid_arg "T.two_sample_paired: unequal length arrays"; 59 | one_sample (Array.mapi ~f:(fun i x -> x -. v2.(i)) v1) ~mean ~alternative () 60 | end 61 | 62 | module ChiSquared = struct 63 | let finalize d chisq = 64 | let open Distributions.ChiSquared in 65 | { 66 | test_statistic = chisq; 67 | test_pvalue = 1. -. cumulative_probability ~x:chisq d 68 | } 69 | 70 | let goodness_of_fit observed ?expected ?(df=0) () = 71 | let n = Array.length observed in 72 | let expected = match expected with 73 | | Some expected -> 74 | if Array.length expected <> n 75 | then invalid_arg "ChiSquared.goodness_of_fit: unequal length arrays" 76 | else 77 | (* TODO(superbobry): make sure we have wellformed frequencies. *) 78 | expected 79 | | None -> 80 | Array.(make n (fold_left observed ~f:(+.) ~init:0. /. float_of_int n)) 81 | and chisq = ref 0. in begin 82 | for i = 0 to n - 1 do 83 | chisq := !chisq +. sqr (observed.(i) -. expected.(i)) /. expected.(i) 84 | done 85 | end; 86 | 87 | finalize (Distributions.ChiSquared.create ~df:(n - 1 - df)) !chisq 88 | 89 | let independence observed ?(correction=false) () = 90 | let observed = Matrix_flat.of_arrays observed in 91 | let (m, n) = Matrix_flat.dims observed in 92 | if m = 0 || n = 0 then invalid_arg "ChiSquared.independence: no data" 93 | else if Matrix_flat.exists ~f:(fun x -> x < 0.) observed 94 | then invalid_arg ("ChiSquared.independence: observed values must " ^ 95 | "be non negative"); 96 | 97 | let expected = Matrix_flat.create m n in 98 | let open Gsl.Blas_flat in 99 | gemm ~ta:Trans ~tb:NoTrans ~alpha:(1. /. Matrix_flat.sum observed) ~beta:1. 100 | ~a:(Matrix_flat.row_sums observed) 101 | ~b:(Matrix_flat.col_sums observed) 102 | ~c:expected; 103 | 104 | if Matrix_flat.exists ~f:((=) 0.) expected 105 | then invalid_arg ("ChiSquared.independence: computed expected " ^ 106 | " frequencies matrix has a zero element"); 107 | 108 | match (m - 1) * (n - 1) with 109 | | 0 -> 110 | (* This degenerate case is shamelessly ripped of from SciPy 111 | 'chi2_contingency' function. *) 112 | { test_statistic = 0.; test_pvalue = 1. } 113 | | df -> 114 | let chisq = 115 | let open Matrix_flat in 116 | let t = create m n in begin 117 | memcpy ~src:expected ~dst:t; 118 | sub t observed; 119 | if df = 1 && correction then begin 120 | map t ~f:abs_float; (* Use Yates' correction for continuity. *) 121 | add_constant t (-. 0.5) 122 | end; 123 | mul_elements t t; 124 | div_elements t expected; 125 | sum t 126 | end 127 | in finalize (Distributions.ChiSquared.create ~df) chisq 128 | end 129 | 130 | module KolmogorovSmirnov = struct 131 | let create_h n d = 132 | let k = floor (float_of_int n *. d) +. 1. in 133 | let h = k -. float_of_int n *. d in 134 | let size = 2 * int_of_float k - 1 in 135 | let m = Matrix_flat.create ~init:0. size size in begin 136 | (* Set all element in the lower triangle to 1. *) 137 | for i = 0 to size - 1 do 138 | for j = max 0 (i - 1) to size - 1 do 139 | Matrix_flat.set m (size - 1 - i) (size - 1 - j) 1. 140 | done 141 | done; 142 | 143 | (* Pre-calculate 'h' powers for the first column and bottom row. *) 144 | let h_powers = Array.make size 0. in 145 | Array.unsafe_set h_powers 0 h; 146 | for i = 1 to size - 1 do 147 | Array.unsafe_set h_powers i 148 | ((Array.unsafe_get h_powers (i - 1)) *. h) 149 | done; 150 | 151 | (* Correct first column and bottom row. *) 152 | for i = 0 to size - 1 do 153 | Matrix_flat.set m i 0 (Matrix_flat.get m i 0 -. Array.get h_powers i); 154 | Matrix_flat.set m (size - 1) i 155 | (Matrix_flat.get m (size - 1) i -. Array.get h_powers (size - 1 - i)) 156 | done; 157 | 158 | (* Correct bottom left element if needed. *) 159 | if 2. *. h > 1. 160 | then Matrix_flat.set m (size - 1) 0 161 | (Matrix_flat.get m (size - 1) 0 +. 162 | (2. *. h -. 1.) ** float_of_int size); 163 | 164 | (* Here come factorials! *) 165 | let facts = Array.make size 1 in 166 | for i = 1 to size - 1 do 167 | Array.unsafe_set facts i (Array.unsafe_get facts (i - 1) * (i + 1)) 168 | done; 169 | 170 | for i = 0 to size - 1 do 171 | for j = i + 1 to size - 1 do 172 | Matrix_flat.set m (size - 1 - i) (size - 1 - j) 173 | (Matrix_flat.get m (size - 1 - i) (size - 1 - j) /. 174 | float_of_int (Array.unsafe_get facts (j - i))) 175 | done 176 | done 177 | end; m 178 | 179 | let goodness_of_fit vs ~cumulative_probability:cp ?(alternative=TwoSided) () = 180 | let n = Array.length vs in 181 | if n < 1 then invalid_arg "KolmogorovSmirnov.goodness_of_fit: no data"; 182 | 183 | let is = Array.sort_index ~cmp:compare vs in 184 | let ds_plus = ref min_float 185 | and ds_minus = ref min_float in begin 186 | for i = 0 to n - 1 do 187 | let j = Array.unsafe_get is i in 188 | ds_minus := max !ds_minus 189 | (cp vs.(j) -. float_of_int i /. float_of_int n); 190 | ds_plus := max !ds_plus 191 | (float_of_int (i + 1) /. float_of_int n -. cp vs.(j)) 192 | done; 193 | end; 194 | 195 | let d = match alternative with 196 | | Less -> !ds_minus 197 | | Greater -> !ds_plus 198 | | TwoSided -> max !ds_minus !ds_plus 199 | in 200 | 201 | let pvalue = match alternative with 202 | | Less | Greater -> 203 | if d <= 0. 204 | then 0. 205 | else if d >= 1. 206 | then 1. 207 | else 208 | (* See Section 3 in Birnbaum and Tingey. *) 209 | let acc = ref 0. in begin 210 | for i = 0 to int_of_float (floor (float_of_int n *. (1. -. d))) do 211 | let j = float_of_int i in 212 | let s1 = (float_of_int n -. j) *. 213 | log (1. -. d -. j /. float_of_int n) 214 | and s2 = (j -. 1.) *. log (d +. j /. float_of_int n) 215 | in acc := !acc +. exp (Gsl.Sf.lnchoose n i +. s1 +. s2) 216 | done 217 | end; d *. !acc 218 | | TwoSided -> 219 | (* See code in Marsaglia et al. *) 220 | let s = float_of_int n *. d *. d in 221 | if s > 7.24 || (s > 3.76 && n > 99) 222 | then 223 | 1. -. 2. *. exp (-. (2.000071 +. 0.331 /. sqrt (float_of_int n) +. 224 | 1.409 /. float_of_int n) *. s) 225 | else 226 | (* FIXME(superbobry): control for overflow in matrix power 227 | calculation. *) 228 | let h = Matrix_flat.power (create_h n d) n in 229 | let k = int_of_float (ceil (float_of_int n *. d)) in 230 | let acc = ref (Matrix_flat.get h (k - 1) (k - 1)) in begin 231 | for i = 1 to n do 232 | acc := !acc *. float_of_int i /. float_of_int n 233 | done 234 | end; 1. -. !acc 235 | in { test_statistic = d; test_pvalue = pvalue } 236 | 237 | (* FIXME(superbobry): see the following bug for critique of 'psmirnov2x' 238 | implementation: 239 | https://bugs.r-project.org/bugzilla3/show_bug.cgi?id=13848 *) 240 | let psmirnov2x d n1 n2 = 241 | let m = min n1 n2 242 | and n = max n1 n2 in 243 | let md = float_of_int m 244 | and nd = float_of_int n in 245 | let q = (0.5 +. floor (d *. md *. nd -. 1e-7)) /. (md *. nd) 246 | and u = Array.make (n + 1) 0. in begin 247 | for i = 0 to n do 248 | Array.unsafe_set u i (if float_of_int i /. nd > q then 0. else 1.) 249 | done; 250 | 251 | for i = 1 to m do 252 | let w = float_of_int i /. (float_of_int i +. nd) in begin 253 | Array.unsafe_set u 0 254 | (if float_of_int i /. md > q 255 | then 0. 256 | else w *. Array.unsafe_get u 0); 257 | 258 | for j = 1 to n do 259 | Array.unsafe_set u j 260 | (if abs_float (float_of_int i /. md -. float_of_int j /. nd) > q 261 | then 0. 262 | else w *. Array.unsafe_get u j +. Array.unsafe_get u (j - 1)) 263 | done 264 | end 265 | done; 266 | 267 | Array.unsafe_get u n 268 | end 269 | 270 | let two_sample v1 v2 ?(alternative=TwoSided) () = 271 | let n1 = Array.length v1 272 | and n2 = Array.length v2 in 273 | if n1 = 0 || n2 = 0 274 | then invalid_arg "KolmogorovSmirnov.two_sample: no data"; 275 | 276 | let vs = Array.append v1 v2 in 277 | let is = Array.sort_index ~cmp:compare vs in 278 | let has_ties = ref false in 279 | for i = 1 to n1 + n2 - 1 do 280 | let j = Array.unsafe_get is i in 281 | let k = Array.unsafe_get is (i - 1) in 282 | has_ties := !has_ties || Array.unsafe_get vs j = Array.unsafe_get vs k 283 | done; 284 | 285 | if !has_ties 286 | then invalid_arg "KolmogorovSmirnov.two_sample: ties are not allowed"; 287 | 288 | let is = Array.sort_index ~cmp:compare vs in 289 | let acc = ref 0. 290 | and z_max = ref neg_infinity 291 | and z_min = ref infinity 292 | and z_abs = ref neg_infinity in begin 293 | for i = 0 to n1 + n2 - 1 do 294 | let j = Array.unsafe_get is i in 295 | acc := 296 | if j < n1 297 | then !acc +. 1. /. float_of_int n1 298 | else !acc -. 1. /. float_of_int n2; 299 | z_max := max !z_max !acc; 300 | z_min := min !z_min !acc; 301 | z_abs := max !z_abs (abs_float !acc) 302 | done 303 | end; 304 | 305 | let d = match alternative with 306 | | Less -> -. !z_min 307 | | Greater -> !z_max 308 | | TwoSided -> !z_abs 309 | in 310 | 311 | let pvalue = match alternative with 312 | | Less | Greater -> 313 | (* Note(superbobry): there's also a X^2 approximation for 314 | one-sided hypothesis, see: 315 | L. Goodman, Kolmogorov-Smirnov test for psycological research. *) 316 | exp (-. 2. *. float_of_int (n1 * n2) /. float_of_int (n1 + n2) *. 317 | d *. d) 318 | | TwoSided -> 319 | (* Note(superbobry): we follow R here and treat two-sided P-value 320 | as a probability that a random assignment of the pooled 321 | data to two sets of the same sizes as the input data sets 322 | has a KS test statistic at least as great as that of the 323 | input data. *) 324 | 1. -. psmirnov2x d n1 n2 325 | in { test_statistic = d; test_pvalue = pvalue } 326 | end 327 | 328 | module MannWhitneyU = struct 329 | let two_sample_independent v1 v2 330 | ?(alternative=TwoSided) ?(correction=true) () = 331 | let n1 = float_of_int (Array.length v1) 332 | and n2 = float_of_int (Array.length v2) in 333 | if n1 = 0. || n2 = 0. 334 | then invalid_arg "MannWhitneyU.two_sample_independent: no data"; 335 | 336 | let n = n1 +. n2 in 337 | let (t, ranks) = Sample.rank (Array.append v1 v2) in 338 | let w1 = Array.(fold_left (sub ranks ~pos:0 ~len:(int_of_float n1)) 339 | ~f:(+.) ~init:0.) 340 | and w2 = Array.(fold_left (sub ranks ~pos:(int_of_float n1) 341 | ~len:(int_of_float n2)) 342 | ~f:(+.) ~init:0.) 343 | in 344 | 345 | let u1 = w1 -. n1 *. (n1 +. 1.) /. 2. in 346 | let u2 = w2 -. n2 *. (n2 +. 1.) /. 2. in 347 | let u = min u1 u2 in 348 | assert (u1 +. u2 = n1 *. n2); 349 | 350 | (* Lower bounds for normal approximation were taken from 351 | Gravetter, Frederick J., and Larry B. Wallnau. 352 | "Statistics for the behavioral sciences". Wadsworth Publishing 353 | Company, 2006. *) 354 | if t <> 0. || (n1 > 20. && n2 > 20.) 355 | then 356 | (* Normal approximation. *) 357 | let mean = n1 *. n2 /. 2. in 358 | let sd = sqrt ((n1 *. n2 /. 12.) *. 359 | ((n +. 1.) -. t /. (n *. (n -. 1.)))) in 360 | let delta = 361 | if correction 362 | then match alternative with 363 | | Less -> -. 0.5 364 | | Greater -> 0.5 365 | | TwoSided -> if u > mean then 0.5 else -. 0.5 366 | else 0. 367 | in 368 | 369 | let z = (u -. mean -. delta) /. sd in 370 | let open Distributions.Normal in 371 | let pvalue = match alternative with 372 | | Less -> cumulative_probability standard ~x:z 373 | | Greater -> 1. -. cumulative_probability standard ~x:z 374 | | TwoSided -> 375 | 2. *. (min (cumulative_probability standard ~x:z) 376 | (1. -. cumulative_probability standard ~x:z)) 377 | in { test_statistic = u; test_pvalue = pvalue } 378 | else 379 | (* Exact critical value. *) 380 | let k = int_of_float (min n1 n2) in 381 | let c = Gsl.Combi.make (int_of_float n) k in 382 | let c_n_k = Gsl.Sf.choose (int_of_float n) k in 383 | let le = ref 0 in 384 | let ge = ref 0 in 385 | begin 386 | for _i = 0 to int_of_float c_n_k - 1 do 387 | let cu = Array.fold_left (Gsl.Combi.to_array c) ~init:0. 388 | ~f:(fun acc i -> acc +. Array.unsafe_get ranks i) -. 389 | float_of_int (k * (k + 1)) /. 2. 390 | in begin 391 | if cu <= u then incr le; 392 | if cu >= u then incr ge; 393 | Gsl.Combi.next c 394 | end 395 | done; 396 | 397 | let pvalue = match alternative with 398 | | Less -> float_of_int !le /. c_n_k 399 | | Greater -> float_of_int !ge /. c_n_k 400 | | TwoSided -> 2. *. float_of_int (min !le !ge) /. c_n_k 401 | in { test_statistic = u; test_pvalue = pvalue } 402 | end 403 | end 404 | 405 | module WilcoxonT = struct 406 | let two_sample_paired v1 v2 ?(alternative=TwoSided) ?(correction=true) () = 407 | let n = Array.length v1 in 408 | if n = 0 409 | then invalid_arg "WilcoxonT.two_sample_paired: no data"; 410 | if n <> Array.length v2 411 | then invalid_arg "WilcoxonT.two_sample_paired: unequal length arrays"; 412 | 413 | let d = Array.init n ~f:(fun i -> v2.(i) -. v1.(i)) in 414 | let (zeros, non_zeros) = Array.partition ~f:((=) 0.) d in 415 | let nz = float_of_int (Array.length non_zeros) in 416 | let (t, ranks) = Sample.rank non_zeros 417 | ~cmp:(fun d1 d2 -> compare (abs_float d1) (abs_float d2)) in 418 | let w_plus = 419 | Array.fold_left ~f:(+.) ~init:0. 420 | (Array.mapi non_zeros 421 | ~f:(fun i v -> if v > 0. then ranks.(i) else 0.)) in 422 | let w_minus = nz *. (nz +. 1.) /. 2. -. w_plus in 423 | 424 | (* Following Sheskin, W is computed as a minimum of W+ and W-. *) 425 | let w = min w_plus w_minus in 426 | 427 | if t <> 0. || Array.length zeros <> 0 || n > 20 428 | then 429 | (* Normal approximation. *) 430 | let mean = nz *. (nz +. 1.) /. 4. in 431 | let sd = sqrt (nz *. (nz +. 1.) *. (2. *. nz +. 1.) /. 24. -. 432 | t /. 48.) in 433 | let delta = 434 | if correction 435 | then match alternative with 436 | | Less -> -0.5 437 | | Greater -> 0.5 438 | | TwoSided -> if w > mean then 0.5 else -0.5 439 | else 0. 440 | in 441 | 442 | let z = (w -. mean -. delta) /. sd in 443 | let open Distributions.Normal in 444 | let pvalue = match alternative with 445 | | Less -> cumulative_probability standard ~x:z 446 | | Greater -> 1. -. cumulative_probability standard ~x:z 447 | | TwoSided -> 448 | 2. *. (min (cumulative_probability standard ~x:z) 449 | (1. -. cumulative_probability standard ~x:z)) 450 | in { test_statistic = w; test_pvalue = pvalue } 451 | else 452 | (* Exact critical value. *) 453 | let le = ref 0 in 454 | let ge = ref 0 in 455 | let two_n = float_of_int (2 lsl (int_of_float nz)) in 456 | begin 457 | for i = 0 to int_of_float two_n - 1 do 458 | let pw = ref 0. in 459 | for j = 0 to int_of_float nz - 1 do 460 | if (i lsr j) land 1 = 1 461 | then pw := !pw +. Array.unsafe_get ranks j; 462 | done; 463 | 464 | if !pw <= w then incr le; 465 | if !pw >= w then incr ge; 466 | done; 467 | 468 | let pvalue = match alternative with 469 | | Less -> float_of_int !le /. two_n 470 | | Greater -> float_of_int !ge /. two_n 471 | | TwoSided -> 2. *. float_of_int (min !le !ge) /. two_n 472 | in { test_statistic = w; test_pvalue = pvalue } 473 | end 474 | 475 | let one_sample vs ?(shift=0.) = 476 | two_sample_paired (Array.make (Array.length vs) shift) vs 477 | end 478 | 479 | module Sign = struct 480 | let two_sample_paired v1 v2 ?(alternative=TwoSided) () = 481 | let n = Array.length v1 in 482 | if n = 0 483 | then invalid_arg "Sign.two_sample_paired: no data"; 484 | if n <> Array.length v2 485 | then invalid_arg "Sign.two_sample_paired: unequal length arrays"; 486 | 487 | let ds = Array.init n ~f:(fun i -> v1.(i) -. v2.(i)) in 488 | let (pi_plus, pi_minus) = Array.fold_left ds ~init:(0, 0) 489 | ~f:(fun (p, m) d -> 490 | if d > 0. 491 | then (succ p, m) 492 | else if d < 0. then (p, succ m) 493 | else (p, m)) 494 | in 495 | 496 | let open Distributions.Binomial in 497 | let d = create ~trials:(pi_plus + pi_minus) ~p:0.5 in 498 | let pvalue = match alternative with 499 | | Less -> cumulative_probability d ~k:pi_plus 500 | | Greater -> 1. -. cumulative_probability d ~k:(pi_plus - 1) 501 | | TwoSided -> 502 | 2. *. (min (cumulative_probability d ~k:pi_plus) 503 | (1. -. cumulative_probability d ~k:(pi_plus - 1))) 504 | in { test_statistic = float_of_int pi_plus; test_pvalue = min 1. pvalue } 505 | 506 | let one_sample vs ?(shift=0.) = 507 | two_sample_paired vs (Array.make (Array.length vs) shift) 508 | end 509 | 510 | 511 | module Multiple = struct 512 | type adjustment_method = 513 | | HolmBonferroni 514 | | BenjaminiHochberg 515 | 516 | let adjust pvalues how = 517 | let m = Array.length pvalues in 518 | let adjusted_pvalues = Array.make m 0. in 519 | begin match how with 520 | | HolmBonferroni -> 521 | let is = Array.sort_index ~cmp:compare pvalues in 522 | let iu = Array.sort_index ~cmp:compare is in begin 523 | for i = 0 to m - 1 do 524 | let j = Array.unsafe_get is i in 525 | Array.unsafe_set adjusted_pvalues i 526 | (min 1. (float_of_int (m - i) *. pvalues.(j))) 527 | done; 528 | 529 | let cm = Base.cumulative ~f:max adjusted_pvalues in 530 | Base.reorder iu ~src:cm ~dst:adjusted_pvalues 531 | end 532 | | BenjaminiHochberg -> 533 | let is = Array.sort_index ~cmp:(fun v1 v2 -> compare v2 v1) pvalues in 534 | let iu = Array.sort_index ~cmp:compare is in begin 535 | for i = 0 to m - 1 do 536 | let j = Array.unsafe_get is i in 537 | Array.unsafe_set adjusted_pvalues i 538 | (min 1. (float_of_int m /. float_of_int (m - i) *. pvalues.(j))) 539 | done; 540 | 541 | let cm = Base.cumulative ~f:min adjusted_pvalues in 542 | Base.reorder iu ~src:cm ~dst:adjusted_pvalues 543 | end 544 | end; adjusted_pvalues 545 | end 546 | -------------------------------------------------------------------------------- /lib/tests.mli: -------------------------------------------------------------------------------- 1 | (** Statistical testing. *) 2 | 3 | type test_alternative = Less | Greater | TwoSided 4 | 5 | type test_result = { 6 | test_statistic : float; 7 | test_pvalue : float 8 | } 9 | 10 | (** Assess significance of the statistical test at a given 11 | [significance_level], which defaults to [0.05]. *) 12 | val run_test 13 | : ?significance_level:float 14 | -> (unit -> test_result) 15 | -> [`Significant | `NotSignificant] 16 | 17 | 18 | module T : sig 19 | (** One sample Student's t-test, which evaluates the null hypothesis 20 | that a [mean] of a normally distributed variable is equal to the 21 | specified value. *) 22 | val one_sample 23 | : float array 24 | -> ?mean:float 25 | -> ?alternative:test_alternative 26 | -> unit 27 | -> test_result 28 | 29 | (** Two sample t-test, which evaluates the null hypothesis that the 30 | difference of means of two {e independent} normally distributed 31 | populations is equal to the specified value. *) 32 | val two_sample_independent 33 | : float array 34 | -> float array 35 | -> ?equal_variance:bool 36 | -> ?mean:float 37 | -> ?alternative:test_alternative 38 | -> unit 39 | -> test_result 40 | 41 | (** Paired two sample t-test, which evaluates the null hypothes that 42 | the difference of means of the two {e paired} normally distributed 43 | populations is equal to the specified value. *) 44 | val two_sample_paired 45 | : float array 46 | -> float array 47 | -> ?mean:float 48 | -> ?alternative:test_alternative 49 | -> unit 50 | -> test_result 51 | end 52 | 53 | (** Pearson's chi-squared test. *) 54 | module ChiSquared : sig 55 | val goodness_of_fit 56 | : float array -> ?expected:float array -> ?df:int -> unit -> test_result 57 | 58 | val independence 59 | : float array array -> ?correction:bool -> unit -> test_result 60 | end 61 | 62 | module KolmogorovSmirnov : sig 63 | (** One-sample Kolmogorov-Smirnov test for goodness of fit, which 64 | evaluates the distribution [G(x)] of the observed random variable 65 | against a given distribution [F(x)]. Under the null hypothesis 66 | the two distributions are identical, [G(x) = F(x)]. *) 67 | val goodness_of_fit 68 | : float array 69 | -> cumulative_probability:(float -> float) 70 | -> ?alternative:test_alternative 71 | -> unit 72 | -> test_result 73 | 74 | (** Two-sample Kolmogorov-Smirnov test, which evaluates the null 75 | hypothesis, that two {e independent} samples are drawn from the 76 | same continious distribution. 77 | 78 | {b Note}: in the current implementation samples with ties will 79 | result in an [Invalid_argument] exception. *) 80 | val two_sample 81 | : float array 82 | -> float array 83 | -> ?alternative:test_alternative 84 | -> unit 85 | -> test_result 86 | 87 | (** {6 References} 88 | 89 | + National Institute of Standards and Technology (US), et al. 90 | "Engineering statistics handbook", Section 1.3.5.16. 91 | The Institute, 2001. 92 | + Jingbo Wang, Wai Wan Tsang, and George Marsaglia. 93 | "Evaluating Kolmogorov's distribution." Journal of 94 | Statistical Software 8, no. 18. 2003. 95 | + Z. W. Birnbaum, Fred H. Tingey. "One-sided confidence contours 96 | for probability distribution functions." The Annals of 97 | Mathematical Statistics, pp592-596. 1951. *) 98 | end 99 | 100 | module MannWhitneyU : sig 101 | (** Mann-Whitney U test (also known as Mann-Whitney-Wilcoxon test and 102 | Wilcoxon rank sum test) is a non-paramteric test, which evaluates 103 | the null hypothesis that two {e independent} samples have equal 104 | medians. *) 105 | val two_sample_independent 106 | : 'a array 107 | -> 'a array 108 | -> ?alternative:test_alternative 109 | -> ?correction:bool 110 | -> unit 111 | -> test_result 112 | 113 | (** {6 References} 114 | 115 | + Gravetter, Frederick J. and Larry B. Wallnau. 116 | "Statistics for the behavioral sciences". Wadsworth Publishing 117 | Company, 2006. 118 | + David J. Sheskin. "Handbook of Parametric and Nonparametric 119 | Statistical Procedures", 3rd edition. CRC Press, 2003. *) 120 | end 121 | 122 | module WilcoxonT : sig 123 | (** Wilcoxon signed-rank test, which evaluates the null hypothesis 124 | that sample median is equal to the specified [shift]. 125 | 126 | Test assumptions: 127 | 128 | + Sample under test was randomly selected from the population it 129 | represents. 130 | + All [vs -. shift] differences are iid and come from a continious 131 | population. *) 132 | val one_sample 133 | : float array 134 | -> ?shift:float 135 | -> ?alternative:test_alternative 136 | -> ?correction:bool 137 | -> unit 138 | -> test_result 139 | 140 | (** Wilcoxon paired signed-rank test, which evaluates the null hypothesis 141 | that two {e related} samples have equal medians. 142 | 143 | Test assumptions: 144 | 145 | + Samples under test were randomly selected from the population 146 | they represent. 147 | + Observation differences [vs2 -. vs1] are iid and come from a 148 | continious population. *) 149 | val two_sample_paired 150 | : float array 151 | -> float array 152 | -> ?alternative:test_alternative 153 | -> ?correction:bool 154 | -> unit 155 | -> test_result 156 | 157 | (** {6 References} 158 | 159 | + David J. Sheskin. "Handbook of Parametric and Nonparametric 160 | Statistical Procedures", 3rd edition. CRC Press, 2003. 161 | + http://www.fon.hum.uva.nl/Service/Statistics/Signed_Rank_Algorihms.html *) 162 | end 163 | 164 | module Sign : sig 165 | (** Sign test, which evaluates the null hypothesis that sample median is 166 | equal to the specified [shift]. 167 | 168 | Test assumptions: 169 | 170 | + Sample under test was randomly selected from the population it 171 | represents. *) 172 | val one_sample 173 | : float array 174 | -> ?shift:float 175 | -> ?alternative:test_alternative 176 | -> unit 177 | -> test_result 178 | 179 | (** Dependent samples sign test, which evaluates the null hypothesis 180 | that the median difference between observations from two {e related} 181 | samples is zero. 182 | 183 | Test assumptions: 184 | 185 | + Samples under test were randomly selected from the population they 186 | represent. *) 187 | val two_sample_paired 188 | : float array 189 | -> float array 190 | -> ?alternative:test_alternative 191 | -> unit 192 | -> test_result 193 | end 194 | 195 | 196 | (** Adjustments for multiple comparisons. *) 197 | module Multiple : sig 198 | type adjustment_method = 199 | | HolmBonferroni 200 | | BenjaminiHochberg 201 | 202 | (** Adjusts obtained P-values for multiple comparisons using a given 203 | adjustment method. *) 204 | val adjust : float array -> adjustment_method -> float array 205 | 206 | (** {6 References} 207 | 208 | + Yoav Benjamini and Yosef Hochberg. "Controlling the false discovery 209 | rate: a practical and powerful approach to multiple testing.", 210 | Journal of the Royal Statistical Society, Series B (Methodological), 211 | pp289-300, 1995. *) 212 | end 213 | -------------------------------------------------------------------------------- /lib_test/common.ml: -------------------------------------------------------------------------------- 1 | open OUnit 2 | 3 | let assert_almost_equal ?(epsilon=1e-6) ?msg x y = 4 | (* Note(superbobry): original version can't handle infinities properly. 5 | See #1381 in oUnit bug tracker. *) 6 | if not (x = infinity || y = infinity || 7 | x = neg_infinity || y = neg_infinity) 8 | then assert_equal 9 | ?msg 10 | ~cmp:(cmp_float ~epsilon) 11 | ~printer:(Printf.sprintf "%.10f") x y 12 | 13 | let cmp_array ~cmp v1 v2 = 14 | let n = min (Array.length v1) (Array.length v2) 15 | and res = ref true in 16 | for i = 0 to n - 1 do 17 | res := !res && cmp (Array.unsafe_get v1 i) (Array.unsafe_get v2 i) 18 | done; !res 19 | 20 | let printer_array ~printer vs = 21 | let inner = String.concat ", " Array.(to_list (map printer vs)) in 22 | Printf.sprintf "[%s]" inner 23 | -------------------------------------------------------------------------------- /lib_test/distributions_test.ml: -------------------------------------------------------------------------------- 1 | open OUnit 2 | 3 | open Pareto.Distributions 4 | 5 | open Common 6 | 7 | 8 | let test_log_density () = 9 | let go ~msg 10 | (type t) 11 | (module D : ContinuousDistribution with type t = t and type elt = float) 12 | (d : t) = 13 | for i = 0 to 100 do 14 | let x = Uniform.(random (create ~lower:0. ~upper:42.)) in 15 | assert_almost_equal ~msg (log (D.density d ~x)) (D.log_density d ~x) 16 | done 17 | in begin 18 | go "Normal" (module Normal) (normal ~mean:4. ~sd:2.); 19 | go "LogNormal" (module LogNormal) (log_normal ~mean:4. ~sd:2.); 20 | go "Uniform" (module Uniform) (uniform ~lower:2. ~upper:4.); 21 | go "Exponential" (module Exponential) (exponential ~scale:42.); 22 | go "ChiSquared" (module ChiSquared) (chi_squared ~df:42); 23 | go "F" (module F) (f ~df1:4 ~df2:2); 24 | go "T" (module T) (t ~df:42.); 25 | go "Gamma" (module Gamma) (gamma ~shape:4. ~scale:2.); 26 | go "Cauchy" (module Cauchy) (cauchy ~location:4. ~scale:2.); 27 | go "Beta" (module Beta) (beta ~alpha:4. ~beta:2.); 28 | go "Logistic" (module Logistic) (logistic ~location:4. ~scale:2.) 29 | end 30 | 31 | and test_log_probability () = 32 | let go msg 33 | (type t) 34 | (module D : DiscreteDistribution with type t = t and type elt = int) 35 | (d : t) = 36 | for i = 0 to 100 do 37 | let k = int_of_float (Uniform.(random (create ~lower:0. ~upper:42.))) in 38 | assert_almost_equal ~msg:msg 39 | (log (D.probability d ~k)) (D.log_probability d ~k) 40 | done 41 | in begin 42 | go "Poisson" (module Poisson) (poisson ~rate:42.); 43 | go "Bernoulli" (module Bernoulli) (bernoulli ~p:0.42); 44 | go "Binomial" (module Binomial) (binomial ~trials:42 ~p:0.42); 45 | go "Geometric" (module Geometric) (geometric ~p:0.42); 46 | go "Hypergeometric" (module Hypergeometric) 47 | (hypergeometric ~m:4 ~t:42 ~k:2); 48 | go "NegativeBinomial" (module NegativeBinomial) 49 | (negative_binomial ~failures:42. ~p:0.42); 50 | end 51 | 52 | 53 | let test_categorical () = 54 | let module Strings = Categorical.Make(String) in 55 | let d = 56 | Strings.create [|("C", 0.3); ("A", 0.1); ("G", 0.5); ("T", 0.1)|] 57 | in begin 58 | assert_almost_equal ~msg:"Pr(X = A)" 0.1 (Strings.probability d "A"); 59 | assert_almost_equal ~msg:"Pr(X = C)" 0.3 (Strings.probability d "C"); 60 | assert_almost_equal ~msg:"Pr(X = G)" 0.5 (Strings.probability d "G"); 61 | assert_almost_equal ~msg:"Pr(X = T)" 0.1 (Strings.probability d "T"); 62 | 63 | assert_almost_equal ~msg:"Pr(X <= A)" 64 | (Strings.cumulative_probability d "A") 0.1; 65 | assert_almost_equal ~msg:"Pr(X <= T)" 66 | (Strings.cumulative_probability d "T") 1.0; 67 | 68 | assert_almost_equal ~msg:"Pr(X <= $)" 69 | (Strings.cumulative_probability d "$") 0. 70 | end 71 | 72 | 73 | let test_gamma_mle () = 74 | let shape = Uniform.(random (create ~lower:0. ~upper:42.)) 75 | and scale = Uniform.(random (create ~lower:0. ~upper:42.)) in 76 | let open Gamma in 77 | let vs = sample ~size:(1 lsl 16) (create ~shape ~scale) in 78 | let { gamma_shape; gamma_scale } = mle ~n_iter:100 ~epsilon:1e-6 vs in 79 | begin 80 | assert_almost_equal ~msg:"shape" ~epsilon:0.01 shape gamma_shape; 81 | assert_almost_equal ~msg:"scale" ~epsilon:0.01 scale gamma_scale 82 | end 83 | 84 | 85 | let test = "Distributions" >::: [ 86 | "categorical" >:: test_categorical; 87 | "density vs. log_density" >:: test_log_density; 88 | "probability vs. log_probability" >:: test_log_probability; 89 | 90 | "Gamma MLE" >:: test_gamma_mle; 91 | ] 92 | -------------------------------------------------------------------------------- /lib_test/sample_test.ml: -------------------------------------------------------------------------------- 1 | open OUnit 2 | 3 | open Pareto.Sample 4 | 5 | open Common 6 | 7 | 8 | let test_summary ~size () = 9 | let vs = 10 | let open Pareto.Distributions.Uniform in 11 | sample ~size (create ~lower:(-42.) ~upper:42.) 12 | in 13 | 14 | let s = Array.fold_left Summary.add Summary.empty vs in begin 15 | assert_almost_equal ~msg:"min" (Summary.min s) (min vs); 16 | assert_almost_equal ~msg:"max" (Summary.max s) (max vs); 17 | assert_equal ~msg:"size" (Summary.size s) (Array.length vs); 18 | assert_almost_equal ~msg:"mean" (Summary.mean s) (mean vs); 19 | assert_almost_equal ~msg:"variance" (Summary.variance s) (variance vs); 20 | assert_almost_equal ~msg:"sd" (Summary.sd s) (sd vs) 21 | end 22 | 23 | and test_combined_summary ~size () = 24 | let vs = 25 | let open Pareto.Distributions.Uniform in 26 | sample ~size (create ~lower:(-42.) ~upper:42.) 27 | in 28 | 29 | let mid = size / 2 in 30 | let v1 = Array.sub vs 0 mid 31 | and v2 = Array.sub vs mid (size - mid) in 32 | let s1 = Array.fold_left Summary.add Summary.empty v1 33 | and s2 = Array.fold_left Summary.add Summary.empty v2 in 34 | let s12 = Summary.combine s1 s2 35 | and s = Array.fold_left Summary.add Summary.empty vs in begin 36 | assert_almost_equal ~msg:"min" (Summary.min s) (Summary.min s12); 37 | assert_almost_equal ~msg:"max" (Summary.max s) (Summary.max s12); 38 | assert_equal ~msg:"size" (Summary.size s) (Summary.size s12); 39 | assert_almost_equal ~msg:"mean" (Summary.mean s) (Summary.mean s12); 40 | assert_almost_equal ~msg:"variance" 41 | (Summary.variance s) (Summary.variance s12); 42 | assert_almost_equal ~msg:"sd" (Summary.sd s) (Summary.sd s12); 43 | assert_almost_equal ~msg:"skewness" 44 | (Summary.skewness s) (Summary.skewness s12); 45 | assert_almost_equal ~msg:"kurtosis" 46 | (Summary.kurtosis s) (Summary.kurtosis s12); 47 | end 48 | 49 | and test_quantile () = 50 | let rec vs = 51 | [|0.952363286988083; 0.829666168783014; 0.563616484350936; 52 | 0.386334933107061; 0.0833841367636058; 0.99997428768617; 53 | 0.374802467851785; 0.349201461890657; 0.89384498325876; 54 | 0.431750792813907|] 55 | and go ~param ~msg expected = 56 | assert_equal 57 | ~msg ~cmp:(cmp_array ~cmp:cmp_float) 58 | ~printer:(printer_array ~printer:(Printf.sprintf "%.6f")) 59 | (Quantile.continuous_by ~param ~ps:[|0.; 0.25; 0.5; 0.75; 1.|] vs) 60 | expected 61 | in begin 62 | go ~param:Quantile.CADPW ~msg:"type 4" [| 63 | 0.0833841367636058; 0.362001964871221; 0.431750792813907; 64 | 0.861755576020887; 0.99997428768617 65 | |]; 66 | go ~param:Quantile.Hazen ~msg:"type 5" [| 67 | 0.0833841367636058; 0.374802467851785; 0.497683638582421; 68 | 0.89384498325876; 0.99997428768617 69 | |]; 70 | go ~param:Quantile.SPSS ~msg:"type 6" [| 71 | 0.0833841367636058; 0.368402216361503; 0.497683638582421; 72 | 0.908474559191091; 0.99997428768617 73 | |]; 74 | go ~param:Quantile.S ~msg:"type 7" [| 75 | 0.0833841367636058; 0.377685584165604; 0.497683638582421; 76 | 0.877800279639823; 0.99997428768617 77 | |]; 78 | go ~param:Quantile.MedianUnbiased ~msg:"type 8" [| 79 | 0.0833841367636058; 0.372669050688358; 0.497683638582421; 80 | 0.898721508569537; 0.99997428768617 81 | |]; 82 | go ~param:Quantile.NormalUnbiased ~msg:"type 9" [| 83 | 0.0833841367636058; 0.373202404979215; 0.497683638582421; 84 | 0.897502377241843; 0.99997428768617 85 | |] 86 | end 87 | 88 | and test_rank () = 89 | let vs = [|4.; 19.; 11.; 18.; 2.; 12.; 13.; 16.; 0.; 2.; 90 | 2.; 7.; 1.; 17.; 16.; 19.; 11.; 12.; 19.; 4.|] 91 | in begin 92 | assert_equal 93 | ~msg:"average" ~cmp:(cmp_array ~cmp:cmp_float) 94 | ~printer:(printer_array ~printer:(Printf.sprintf "%.6f")) 95 | (snd (rank ~ties_strategy:`Average vs)) 96 | [|6.5; 19.; 9.5; 17.; 4.; 11.5; 13.; 14.5; 1.; 4.; 4.; 97 | 8.; 2.; 16.; 14.5; 19.; 9.5; 11.5; 19.; 6.5|]; 98 | assert_equal 99 | ~msg:"min" ~cmp:(cmp_array ~cmp:cmp_float) 100 | ~printer:(printer_array ~printer:(Printf.sprintf "%.6f")) 101 | (snd (rank ~ties_strategy:`Min vs)) 102 | [|6.; 18.; 9.; 17.; 3.; 11.; 13.; 14.; 1.; 3.; 3.; 8.; 103 | 2.; 16.; 14.; 18.; 9.; 11.; 18.; 6.|]; 104 | assert_equal 105 | ~msg:"max" ~cmp:(cmp_array ~cmp:cmp_float) 106 | ~printer:(printer_array ~printer:(Printf.sprintf "%.6f")) 107 | (snd (rank ~ties_strategy:`Max vs)) 108 | [|7.; 20.; 10.; 17.; 5.; 12.; 13.; 15.; 1.; 5.; 5.; 8.; 109 | 2.; 16.; 15.; 20.; 10.; 12.; 20.; 7.|] 110 | end 111 | 112 | and test_correlation () = 113 | let v1 = [|0.0172824052698839; -0.454504077892448; 1.25946495815052; 114 | 1.34046889912174; 0.609915320636686; -0.217651830293509; 115 | 0.991287334206914; -0.186392670591343; 0.0266357683474309; 116 | -1.45310338401619|] 117 | and v2 = [|0.505340435184033; -1.42575172974959; -0.521196733941402; 118 | -0.0185022933706756; 0.17230654109602; 1.67872553102743; 119 | 0.480586104798118; 0.910431368258919; 0.373583673677502; 120 | -0.3494655448979|] 121 | in begin 122 | assert_almost_equal ~msg:"Pearson product-moment correlation" 123 | (Correlation.pearson v1 v2) 0.02894452; 124 | assert_almost_equal ~msg:"Spearman rank correlation" 125 | (Correlation.spearman v1 v2) (-0.07878788); 126 | 127 | assert_equal 128 | ~msg:"Pearson product-moment self-correlation" 129 | ~cmp:(cmp_array ~cmp:cmp_float) 130 | ~printer:(printer_array ~printer:(Printf.sprintf "%.6f")) 131 | (Correlation.Auto.pearson v1) 132 | [|1.; 0.0975335049156232; 0.00331984882534066; -0.20666268433441; 133 | 0.179634622194576; -0.255208927944468; -0.289850963695003; 134 | -0.233617717921346; 0.162004926236094; 0.0428473917235931|] 135 | end 136 | 137 | and test_moments () = 138 | let vs = 139 | let open Pareto.Distributions.Uniform in 140 | sample ~size:1024 (create ~lower:(-42.) ~upper:42.) 141 | in 142 | 143 | let k = 8 in 144 | let ms = Pareto.Sample.moments k vs in 145 | for p = 1 to k do 146 | assert_almost_equal ~msg:(string_of_int p) 147 | (Array.fold_left (+.) 0. 148 | (Array.map (fun v -> v ** float_of_int p) vs) /. 1024.) 149 | ms.(p - 1) 150 | done 151 | 152 | 153 | let test = "Sample" >::: [ 154 | "summary statistics, n = 100" >:: test_summary ~size:100; 155 | "summary statistics, n = 1000" >:: test_summary ~size:1000; 156 | "summary statistics, n = 10000" >:: test_summary ~size:10000; 157 | "combined summary, n = 100" >:: test_combined_summary ~size:100; 158 | "combined summary, n = 1000" >:: test_combined_summary ~size:1000; 159 | "combined summary, n = 10000" >:: test_combined_summary ~size:10000; 160 | "quantile" >:: test_quantile; 161 | "rank" >:: test_rank; 162 | "correlation" >:: test_correlation; 163 | "moments" >:: test_moments 164 | ] 165 | -------------------------------------------------------------------------------- /lib_test/test.ml: -------------------------------------------------------------------------------- 1 | open OUnit 2 | 3 | let all () = TestList [ 4 | Tests_test.test; 5 | Sample_test.test; 6 | Distributions_test.test 7 | ] 8 | -------------------------------------------------------------------------------- /lib_test/test_runner.ml: -------------------------------------------------------------------------------- 1 | let () = 2 | let open OUnit in 3 | ignore (run_test_tt_main (Test.all ())) 4 | -------------------------------------------------------------------------------- /lib_test/tests_test.ml: -------------------------------------------------------------------------------- 1 | open OUnit 2 | 3 | open Pareto.Tests 4 | 5 | open Common 6 | 7 | 8 | let assert_equal_test_result ~msg tr1 tr2 = 9 | assert_almost_equal ~msg tr1.test_statistic tr2.test_statistic; 10 | assert_almost_equal ~msg tr1.test_pvalue tr2.test_pvalue 11 | 12 | let assert_equal_test_results ?(msg="") 13 | (f : ?alternative:test_alternative -> unit -> test_result) 14 | expected = 15 | List.iter2 (fun tr1 alternative -> 16 | let tr2 = f ~alternative () in 17 | let direction = match alternative with 18 | | TwoSided -> "two-sided" 19 | | Less -> "less" 20 | | Greater -> "greater" 21 | in assert_equal_test_result 22 | ~msg:(if msg = "" 23 | then direction 24 | else Printf.sprintf "%s, %s" msg direction) 25 | tr1 tr2 26 | ) expected [TwoSided; Less; Greater] 27 | 28 | 29 | let t_test_one_sample () = 30 | let vs = [|0.88456; 0.43590; 0.95778; -1.05039; -0.38589; 31 | -0.06342; -0.18712; 1.58856; 0.86964; 1.22192|] 32 | in begin 33 | assert_equal_test_results (T.one_sample vs ~mean:0.) [ 34 | { test_statistic = 1.6368118; test_pvalue = 0.1360967 }; 35 | { test_statistic = 1.6368118; test_pvalue = 0.931951 }; 36 | { test_statistic = 1.6368118; test_pvalue = 0.0680483 } 37 | ]; 38 | 39 | (* Zero division issues. *) 40 | let tr = 41 | T.one_sample [|0.; 0.; 0.; 0.|] ~mean:0. ~alternative:TwoSided () 42 | in begin 43 | assert_bool "t-statistic or t-test P-value is not nan" 44 | (tr.test_statistic <> tr.test_statistic && 45 | tr.test_pvalue <> tr.test_pvalue) 46 | end 47 | end 48 | 49 | and t_test_two_sample_independent () = 50 | let v1 = [|-0.86349; 0.36688; -0.48266; 0.53237; -0.87635; 51 | -1.28357; -1.46325; 0.21937; -0.38159; -0.22752|] 52 | and v2 = [|-0.20951; 1.27388; 0.27331; 1.85599; -1.09702; 53 | -0.20033; -0.45065; 0.06710; -0.18932; 1.60007|] 54 | in begin 55 | assert_equal_test_results ~msg:"unequal variance" 56 | (T.two_sample_independent v1 v2 ~mean:0.42 ~equal_variance:false) 57 | [{ test_statistic = -3.097208; test_pvalue = 0.00683211 }; 58 | { test_statistic = -3.097208; test_pvalue = 0.003416056 }; 59 | { test_statistic = -3.097208; test_pvalue = 0.996583 }]; 60 | assert_equal_test_results ~msg:"equal variance" 61 | (T.two_sample_independent v1 v2 ~mean:0.24 ~equal_variance:true) 62 | [{ test_statistic = -2.615915; test_pvalue = 0.01750332 }; 63 | { test_statistic = -2.615915; test_pvalue = 0.00875166 }; 64 | { test_statistic = -2.615915; test_pvalue = 0.991248 }] 65 | end 66 | 67 | and t_test_two_sample_paired () = 68 | let v1 = [|-0.86349; 0.36688; -0.48266; 0.53237; -0.87635; 69 | -1.28357; -1.46325; 0.21937; -0.38159; -0.22752|] 70 | and v2 = [|-0.20951; 1.27388; 0.27331; 1.85599; -1.09702; 71 | -0.20033; -0.45065; 0.06710; -0.18932; 1.60007|] 72 | in begin 73 | assert_equal_test_results 74 | (T.two_sample_paired v1 v2 ~mean:0.) 75 | [{ test_statistic = -3.607401; test_pvalue = 0.0056823 }; 76 | { test_statistic = -3.607401; test_pvalue = 0.00284115 }; 77 | { test_statistic = -3.607401; test_pvalue = 0.997158 }]; 78 | end 79 | 80 | and chisq_test_gof () = 81 | let observed = [|42.; 24.; 10.; 10.|] 82 | and expected = [|21.; 22.; 17.; 26.|] 83 | in begin 84 | assert_equal_test_result ~msg:"with uniform probabilities" 85 | (ChiSquared.goodness_of_fit observed ()) 86 | { test_statistic = 32.139534; test_pvalue = 4.8908077607843233e-07 }; 87 | 88 | (* Note(superbobry): R doesn't have a version of the test with 89 | frequences, thus we use 'scipy.stats.chisquare' as a reference. *) 90 | assert_equal_test_result ~msg:"with given probabilities" 91 | (ChiSquared.goodness_of_fit observed ~expected ()) 92 | { test_statistic = 33.910324; test_pvalue = 2.06945476533e-07 } 93 | end 94 | 95 | and chisq_test_independence () = 96 | let observed = [| 97 | [|4.; 3.; 5.; 3.; 5.; 3.; 2.; 5.; 4.; 4.; 4.; 3.|]; 98 | [|2.; 2.; 1.; 2.; 3.; 1.; 2.; 3.; 2.; 1.; 1.; 3.|]; 99 | [|2.; 4.; 3.; 3.; 4.; 3.; 3.; 4.; 4.; 1.; 2.; 1.|]; 100 | [|3.; 5.; 4.; 3.; 4.; 4.; 3.; 3.; 3.; 4.; 4.; 4.|] 101 | |] in begin 102 | assert_equal_test_result ~msg:"with continuity correction" 103 | (ChiSquared.independence observed ~correction:true ()) 104 | { test_statistic = 9.153073; test_pvalue = 0.999987 }; 105 | end 106 | 107 | and ks_test_gof () = 108 | let vs = [|-1.52455; 0.79745; 0.76526; -2.32246; 0.15411; 109 | -1.36430; 0.62041; 1.17614; 1.09825; -0.17400|] 110 | in begin 111 | let open Pareto.Distributions.Normal in 112 | assert_equal_test_results ~msg:"standard normal" 113 | (KolmogorovSmirnov.goodness_of_fit vs 114 | ~cumulative_probability:(fun x -> cumulative_probability standard ~x)) 115 | [{ test_statistic = 0.232506; test_pvalue = 0.575175 }; 116 | { test_statistic = 0.232506; test_pvalue = 0.293906 }; 117 | { test_statistic = 0.2137634; test_pvalue = 0.3516124 }] 118 | end 119 | 120 | and ks_test_two_sample () = 121 | let v1 = [|0.60074; 1.93516; 0.62419; -0.40251; -0.14719; 122 | -0.05324; -0.95052; 1.84247; -0.58041; -0.75201|] 123 | and v2 = [|-0.33866; -0.59032; -0.12525; -0.81013; -0.60733; 124 | 0.18550; 1.01396; 0.17067; -0.74872; 1.03694|] 125 | in begin 126 | assert_equal_test_results 127 | (KolmogorovSmirnov.two_sample v1 v2) 128 | [{ test_statistic = 0.2; test_pvalue = 0.994457 }; 129 | { test_statistic = 0.2; test_pvalue = 0.670320 }; 130 | { test_statistic = 0.1; test_pvalue = 0.904837 }] 131 | end 132 | 133 | and mann_whitney_test_two_sample () = 134 | (* Source: 'scipy/stats/tests/test_stats.py'. *) 135 | let v1 = [|1.; 1.; 1.; 1.; 1.; 1.; 1.; 1.; 1.; 1.; 1.; 1.; 1.; 136 | 1.; 1.; 1.; 1.; 1.; 1.; 1.; 1.; 2.; 1.; 1.; 1.; 1.; 1.; 1.; 1.; 137 | 1.; 1.; 1.; 1.; 1.; 1.; 1.; 1.; 1.; 1.; 1.; 1.; 1.; 1.; 1.; 1.; 138 | 1.; 1.; 1.; 1.; 1.; 1.; 1.; 1.; 1.; 1.; 1.; 1.; 1.; 1.; 1.; 1.; 139 | 1.; 1.; 2.; 1.; 1.; 1.; 1.; 1.; 1.; 1.; 1.; 1.; 1.; 1.; 1.; 1.; 140 | 1.; 1.; 1.; 1.; 1.; 1.; 1.; 1.; 1.; 1.; 1.; 1.; 1.; 1.; 1.; 1.; 141 | 1.; 1.; 1.; 1.; 2.; 1.; 1.; 1.; 1.; 1.; 1.; 1.; 1.; 1.; 1.; 1.; 142 | 1.; 1.; 1.; 1.; 1.; 2.; 1.; 1.; 1.; 1.; 2.; 1.; 1.; 2.; 1.; 1.; 143 | 2.; 1.; 1.; 1.; 1.; 1.; 1.; 1.; 1.; 1.; 1.; 1.; 1.; 1.; 1.; 1.; 144 | 1.; 1.; 1.; 1.; 1.; 1.; 1.; 1.; 1.; 1.; 1.; 2.; 1.; 1.; 1.; 1.; 145 | 1.; 1.; 1.; 1.; 1.; 1.; 1.; 1.; 1.; 1.; 1.; 1.; 1.; 1.; 1.; 1.; 146 | 1.; 1.; 1.; 1.; 1.; 1.; 1.; 1.; 1.; 1.; 1.; 1.; 1.; 1.; 1.; 1.; 147 | 2.; 1.; 1.; 1.; 1.; 1.; 1.; 1.; 1.; 1.; 2.; 1.; 1.; 1.; 1.; 1.; 148 | 1.; 1.; 1.; 1.; 1.; 1.; 1.; 1.; 1.; 1.; 1.; 1.; 1.; 3.; 1.; 1.; 149 | 1.; 1.; 1.; 1.; 1.; 1.; 1.; 1.; 1.; 1.; 1.; 1.; 1.; 1.; 1.; 1.; 150 | 1.; 1.; 1.; 1.; 1.; 1.; 1.|] 151 | and v2 = [|1.; 1.; 1.; 1.; 1.; 1.; 1.; 2.; 1.; 2.; 1.; 1.; 1.; 152 | 1.; 2.; 1.; 1.; 1.; 2.; 1.; 1.; 1.; 1.; 1.; 2.; 1.; 1.; 3.; 1.; 153 | 1.; 1.; 1.; 1.; 1.; 1.; 1.; 1.; 1.; 2.; 1.; 2.; 1.; 1.; 1.; 1.; 154 | 1.; 1.; 2.; 1.; 1.; 1.; 1.; 1.; 1.; 1.; 1.; 1.; 1.; 1.; 1.; 1.; 155 | 1.; 1.; 2.; 1.; 1.; 1.; 1.; 1.; 2.; 2.; 1.; 1.; 2.; 1.; 1.; 2.; 156 | 1.; 2.; 1.; 1.; 1.; 1.; 2.; 2.; 1.; 1.; 1.; 1.; 1.; 1.; 1.; 1.; 157 | 1.; 1.; 1.; 1.; 1.; 1.; 2.; 1.; 1.; 1.; 1.; 1.; 2.; 2.; 2.; 1.; 158 | 1.; 1.; 1.; 1.; 1.; 1.; 1.; 1.; 1.; 1.; 1.; 1.; 1.; 1.; 1.; 1.; 159 | 1.; 2.; 1.; 1.; 2.; 1.; 1.; 1.; 1.; 2.; 1.; 1.; 1.; 1.; 1.; 1.; 160 | 1.; 1.; 1.; 1.; 1.; 1.; 2.; 1.; 1.; 1.; 2.; 1.; 1.; 1.; 1.; 1.; 161 | 1.|] 162 | in begin 163 | assert_equal_test_results ~msg:"with continuity correction and ties" 164 | (MannWhitneyU.two_sample_independent v1 v2 ~correction:true) 165 | [{ test_statistic = 16980.5; test_pvalue = 0.0000564287 }; 166 | { test_statistic = 16980.5; test_pvalue = 0.0000282143 }; 167 | { test_statistic = 16980.5; test_pvalue = 0.999972 }]; 168 | 169 | assert_equal_test_results ~msg:"with continuity correction and no ties" 170 | (MannWhitneyU.two_sample_independent 171 | [|11; 4; 2; 5|] [|12; 3; 9; 7|] ~correction:true) 172 | [{ test_statistic = 5.; test_pvalue = 0.4857143 }; 173 | { test_statistic = 5.; test_pvalue = 0.2428571 }; 174 | { test_statistic = 5.; test_pvalue = 0.8285714 }] 175 | end 176 | 177 | and wilcoxon_signed_rank_test_one_sample () = 178 | begin 179 | (* Source: Sheskin, Example 6.1. 180 | 181 | Note(superbobry): we don't cross-check statistic value with R 182 | here, because R computes W = W-, while our implementation 183 | follows Sheskin and defines W = min(W-, W+). I guess that's 184 | why one-tailed P-values are flipped. *) 185 | assert_equal_test_results ~msg:"with continuity correction and ties" 186 | (WilcoxonT.one_sample [|9.; 10.; 8.; 4.; 8.; 3.; 0.; 10.; 15.; 9.|] 187 | ~shift:5. ~correction:true) 188 | [{ test_statistic = 11.; test_pvalue = 0.101575647 }; 189 | { test_statistic = 11.; test_pvalue = 0.0507878 }; 190 | { test_statistic = 11.; test_pvalue = 0.959035 }]; 191 | 192 | assert_equal_test_results ~msg:"with continuity correction and no ties" 193 | (WilcoxonT.one_sample [|9.; 10.; 8.; 4.; 3.; 15.|] 194 | ~shift:5. ~correction:true) 195 | [{ test_statistic = 3.; test_pvalue = 0.15625 }; 196 | { test_statistic = 3.; test_pvalue = 0.078125 }; 197 | { test_statistic = 3.; test_pvalue = 0.953125 }] 198 | end 199 | 200 | and wilcoxon_signed_rank_test_two_samples () = 201 | let v1 = [|78.; 24.; 64.; 45.; 64.; 52.; 30.; 50.; 64.; 50.; 202 | 78.; 22.; 84.; 40.; 90.; 72.|] 203 | and v2 = [|78.; 24.; 62.; 48.; 68.; 56.; 25.; 44.; 56.; 40.; 204 | 68.; 36.; 68.; 20.; 58.; 32.|] 205 | in begin 206 | assert_equal_test_results ~msg:"with continuity correction and ties" 207 | (WilcoxonT.two_sample_paired v1 v2 ~correction:true) 208 | [{ test_statistic = 19.; test_pvalue = 0.0382053143 }; 209 | { test_statistic = 19.; test_pvalue = 0.01910266 }; 210 | { test_statistic = 19.; test_pvalue = 0.983638 }] 211 | end 212 | 213 | and sign_test_one_sample () = 214 | (* Source: Sheskin, Example 9.7. *) 215 | let vs = 216 | [|230.; 167.; 250.; 345.; 442.; 190.; 200.; 248.; 289.; 262.; 301.|] 217 | in begin 218 | assert_equal_test_results 219 | (Sign.one_sample vs ~shift:200.) 220 | [{ test_statistic = 8.; test_pvalue = 0.109375 }; 221 | { test_statistic = 8.; test_pvalue = 0.9892578 }; 222 | { test_statistic = 8.; test_pvalue = 0.0546875 }] 223 | end 224 | 225 | and sign_test_two_sample () = 226 | (* Source: Sheskin, Example 19.1. *) 227 | let v1 = [|9.; 2.; 1.; 4.; 6.; 4.; 7.; 8.; 5.; 1.|] 228 | and v2 = [|8.; 2.; 3.; 2.; 3.; 0.; 4.; 5.; 4.; 0.|] 229 | in begin 230 | assert_equal_test_results 231 | (Sign.two_sample_paired v1 v2) 232 | [{ test_statistic = 8.; test_pvalue = 0.0390625 }; 233 | { test_statistic = 8.; test_pvalue = 0.9980468 }; 234 | { test_statistic = 8.; test_pvalue = 0.01953125 }] 235 | end 236 | 237 | 238 | let test_hb_adjust () = 239 | let pvalues = 240 | [|0.000962882346117542; 0.00189844480724466; 0.0183097438104205; 241 | 0.0315318359604176; 0.0481693657349631; 0.105687877464594; 242 | 0.543211136961355; 0.565056666152251; 0.603476808731503; 243 | 0.955690764788587|] 244 | and adjusted_pvalues = 245 | [|0.00962882346117542; 0.0170860032652019; 0.146477950483364; 246 | 0.220722851722923; 0.289016194409779; 0.528439387322972; 247 | 1.; 1.; 1.; 1.|] 248 | in begin 249 | assert_equal 250 | ~cmp:(cmp_array ~cmp:(cmp_float ~epsilon:1e-6)) 251 | ~printer:(printer_array ~printer:(Printf.sprintf "%.6f")) 252 | adjusted_pvalues 253 | Multiple.(adjust pvalues HolmBonferroni) 254 | end 255 | 256 | and test_bh_adjust () = 257 | let pvalues = 258 | [|0.000962882346117542; 0.00189844480724466; 0.0183097438104205; 259 | 0.0315318359604176; 0.0481693657349631; 0.105687877464594; 260 | 0.543211136961355; 0.565056666152251; 0.603476808731503; 261 | 0.955690764788587|] 262 | and adjusted_pvalues = 263 | [|0.00949222403622329; 0.00949222403622329; 0.0610324793680683; 264 | 0.078829589901044; 0.0963387314699263; 0.176146462440991; 265 | 0.670529787479448; 0.670529787479448; 0.670529787479448; 266 | 0.955690764788587|] 267 | in begin 268 | assert_equal 269 | ~cmp:(cmp_array ~cmp:(cmp_float ~epsilon:1e-6)) 270 | ~printer:(printer_array ~printer:(Printf.sprintf "%.6f")) 271 | adjusted_pvalues 272 | Multiple.(adjust pvalues BenjaminiHochberg) 273 | end 274 | 275 | 276 | let test = "Tests" >::: [ 277 | "one-sample t-test" >:: t_test_one_sample; 278 | "two-sample t-test for independent samples" >:: 279 | t_test_two_sample_independent; 280 | "two-sample t-test for paired samples" >:: t_test_two_sample_paired; 281 | "X^2 test for goodness of fit" >:: chisq_test_gof; 282 | "X^2 test for independence" >:: chisq_test_independence; 283 | "one-sample Kolmogorov-Smirnov test for goodness of fit" >:: ks_test_gof; 284 | "two-sample Kolmogorov-Smirnov test" >:: ks_test_two_sample; 285 | "two-sample Mann-Whitney test for independent samples" >:: 286 | mann_whitney_test_two_sample; 287 | "one-sample Wilcoxon signed-rank test" >:: 288 | wilcoxon_signed_rank_test_one_sample; 289 | "two-sample Wilcoxon signed-rank test for paired samples" >:: 290 | wilcoxon_signed_rank_test_two_samples; 291 | "one-sample sign test" >:: sign_test_one_sample; 292 | "two-sample sign test" >:: sign_test_two_sample; 293 | 294 | "Holm-Bonferroni P-value adjustment" >:: test_hb_adjust; 295 | "Benjamini-Hochberg P-value adjustment" >:: test_bh_adjust 296 | ] 297 | -------------------------------------------------------------------------------- /myocamlbuild.ml: -------------------------------------------------------------------------------- 1 | (* OASIS_START *) 2 | (* DO NOT EDIT (digest: ee450cf06c8efebd990f1826664f8a22) *) 3 | module OASISGettext = struct 4 | (* # 22 "src/oasis/OASISGettext.ml" *) 5 | 6 | 7 | let ns_ str = 8 | str 9 | 10 | 11 | let s_ str = 12 | str 13 | 14 | 15 | let f_ (str: ('a, 'b, 'c, 'd) format4) = 16 | str 17 | 18 | 19 | let fn_ fmt1 fmt2 n = 20 | if n = 1 then 21 | fmt1^^"" 22 | else 23 | fmt2^^"" 24 | 25 | 26 | let init = 27 | [] 28 | 29 | 30 | end 31 | 32 | module OASISExpr = struct 33 | (* # 22 "src/oasis/OASISExpr.ml" *) 34 | 35 | 36 | 37 | 38 | 39 | open OASISGettext 40 | 41 | 42 | type test = string 43 | 44 | 45 | type flag = string 46 | 47 | 48 | type t = 49 | | EBool of bool 50 | | ENot of t 51 | | EAnd of t * t 52 | | EOr of t * t 53 | | EFlag of flag 54 | | ETest of test * string 55 | 56 | 57 | 58 | type 'a choices = (t * 'a) list 59 | 60 | 61 | let eval var_get t = 62 | let rec eval' = 63 | function 64 | | EBool b -> 65 | b 66 | 67 | | ENot e -> 68 | not (eval' e) 69 | 70 | | EAnd (e1, e2) -> 71 | (eval' e1) && (eval' e2) 72 | 73 | | EOr (e1, e2) -> 74 | (eval' e1) || (eval' e2) 75 | 76 | | EFlag nm -> 77 | let v = 78 | var_get nm 79 | in 80 | assert(v = "true" || v = "false"); 81 | (v = "true") 82 | 83 | | ETest (nm, vl) -> 84 | let v = 85 | var_get nm 86 | in 87 | (v = vl) 88 | in 89 | eval' t 90 | 91 | 92 | let choose ?printer ?name var_get lst = 93 | let rec choose_aux = 94 | function 95 | | (cond, vl) :: tl -> 96 | if eval var_get cond then 97 | vl 98 | else 99 | choose_aux tl 100 | | [] -> 101 | let str_lst = 102 | if lst = [] then 103 | s_ "" 104 | else 105 | String.concat 106 | (s_ ", ") 107 | (List.map 108 | (fun (cond, vl) -> 109 | match printer with 110 | | Some p -> p vl 111 | | None -> s_ "") 112 | lst) 113 | in 114 | match name with 115 | | Some nm -> 116 | failwith 117 | (Printf.sprintf 118 | (f_ "No result for the choice list '%s': %s") 119 | nm str_lst) 120 | | None -> 121 | failwith 122 | (Printf.sprintf 123 | (f_ "No result for a choice list: %s") 124 | str_lst) 125 | in 126 | choose_aux (List.rev lst) 127 | 128 | 129 | end 130 | 131 | 132 | # 132 "myocamlbuild.ml" 133 | module BaseEnvLight = struct 134 | (* # 22 "src/base/BaseEnvLight.ml" *) 135 | 136 | 137 | module MapString = Map.Make(String) 138 | 139 | 140 | type t = string MapString.t 141 | 142 | 143 | let default_filename = 144 | Filename.concat 145 | (Sys.getcwd ()) 146 | "setup.data" 147 | 148 | 149 | let load ?(allow_empty=false) ?(filename=default_filename) () = 150 | if Sys.file_exists filename then 151 | begin 152 | let chn = 153 | open_in_bin filename 154 | in 155 | let st = 156 | Stream.of_channel chn 157 | in 158 | let line = 159 | ref 1 160 | in 161 | let st_line = 162 | Stream.from 163 | (fun _ -> 164 | try 165 | match Stream.next st with 166 | | '\n' -> incr line; Some '\n' 167 | | c -> Some c 168 | with Stream.Failure -> None) 169 | in 170 | let lexer = 171 | Genlex.make_lexer ["="] st_line 172 | in 173 | let rec read_file mp = 174 | match Stream.npeek 3 lexer with 175 | | [Genlex.Ident nm; Genlex.Kwd "="; Genlex.String value] -> 176 | Stream.junk lexer; 177 | Stream.junk lexer; 178 | Stream.junk lexer; 179 | read_file (MapString.add nm value mp) 180 | | [] -> 181 | mp 182 | | _ -> 183 | failwith 184 | (Printf.sprintf 185 | "Malformed data file '%s' line %d" 186 | filename !line) 187 | in 188 | let mp = 189 | read_file MapString.empty 190 | in 191 | close_in chn; 192 | mp 193 | end 194 | else if allow_empty then 195 | begin 196 | MapString.empty 197 | end 198 | else 199 | begin 200 | failwith 201 | (Printf.sprintf 202 | "Unable to load environment, the file '%s' doesn't exist." 203 | filename) 204 | end 205 | 206 | 207 | let rec var_expand str env = 208 | let buff = 209 | Buffer.create ((String.length str) * 2) 210 | in 211 | Buffer.add_substitute 212 | buff 213 | (fun var -> 214 | try 215 | var_expand (MapString.find var env) env 216 | with Not_found -> 217 | failwith 218 | (Printf.sprintf 219 | "No variable %s defined when trying to expand %S." 220 | var 221 | str)) 222 | str; 223 | Buffer.contents buff 224 | 225 | 226 | let var_get name env = 227 | var_expand (MapString.find name env) env 228 | 229 | 230 | let var_choose lst env = 231 | OASISExpr.choose 232 | (fun nm -> var_get nm env) 233 | lst 234 | end 235 | 236 | 237 | # 237 "myocamlbuild.ml" 238 | module MyOCamlbuildFindlib = struct 239 | (* # 22 "src/plugins/ocamlbuild/MyOCamlbuildFindlib.ml" *) 240 | 241 | 242 | (** OCamlbuild extension, copied from 243 | * http://brion.inria.fr/gallium/index.php/Using_ocamlfind_with_ocamlbuild 244 | * by N. Pouillard and others 245 | * 246 | * Updated on 2009/02/28 247 | * 248 | * Modified by Sylvain Le Gall 249 | *) 250 | open Ocamlbuild_plugin 251 | 252 | type conf = 253 | { no_automatic_syntax: bool; 254 | } 255 | 256 | (* these functions are not really officially exported *) 257 | let run_and_read = 258 | Ocamlbuild_pack.My_unix.run_and_read 259 | 260 | 261 | let blank_sep_strings = 262 | Ocamlbuild_pack.Lexers.blank_sep_strings 263 | 264 | 265 | let exec_from_conf exec = 266 | let exec = 267 | let env_filename = Pathname.basename BaseEnvLight.default_filename in 268 | let env = BaseEnvLight.load ~filename:env_filename ~allow_empty:true () in 269 | try 270 | BaseEnvLight.var_get exec env 271 | with Not_found -> 272 | Printf.eprintf "W: Cannot get variable %s\n" exec; 273 | exec 274 | in 275 | let fix_win32 str = 276 | if Sys.os_type = "Win32" then begin 277 | let buff = Buffer.create (String.length str) in 278 | (* Adapt for windowsi, ocamlbuild + win32 has a hard time to handle '\\'. 279 | *) 280 | String.iter 281 | (fun c -> Buffer.add_char buff (if c = '\\' then '/' else c)) 282 | str; 283 | Buffer.contents buff 284 | end else begin 285 | str 286 | end 287 | in 288 | fix_win32 exec 289 | 290 | let split s ch = 291 | let buf = Buffer.create 13 in 292 | let x = ref [] in 293 | let flush () = 294 | x := (Buffer.contents buf) :: !x; 295 | Buffer.clear buf 296 | in 297 | String.iter 298 | (fun c -> 299 | if c = ch then 300 | flush () 301 | else 302 | Buffer.add_char buf c) 303 | s; 304 | flush (); 305 | List.rev !x 306 | 307 | 308 | let split_nl s = split s '\n' 309 | 310 | 311 | let before_space s = 312 | try 313 | String.before s (String.index s ' ') 314 | with Not_found -> s 315 | 316 | (* ocamlfind command *) 317 | let ocamlfind x = S[Sh (exec_from_conf "ocamlfind"); x] 318 | 319 | (* This lists all supported packages. *) 320 | let find_packages () = 321 | List.map before_space (split_nl & run_and_read (exec_from_conf "ocamlfind" ^ " list")) 322 | 323 | 324 | (* Mock to list available syntaxes. *) 325 | let find_syntaxes () = ["camlp4o"; "camlp4r"] 326 | 327 | 328 | let well_known_syntax = [ 329 | "camlp4.quotations.o"; 330 | "camlp4.quotations.r"; 331 | "camlp4.exceptiontracer"; 332 | "camlp4.extend"; 333 | "camlp4.foldgenerator"; 334 | "camlp4.listcomprehension"; 335 | "camlp4.locationstripper"; 336 | "camlp4.macro"; 337 | "camlp4.mapgenerator"; 338 | "camlp4.metagenerator"; 339 | "camlp4.profiler"; 340 | "camlp4.tracer" 341 | ] 342 | 343 | 344 | let dispatch conf = 345 | function 346 | | After_options -> 347 | (* By using Before_options one let command line options have an higher 348 | * priority on the contrary using After_options will guarantee to have 349 | * the higher priority override default commands by ocamlfind ones *) 350 | Options.ocamlc := ocamlfind & A"ocamlc"; 351 | Options.ocamlopt := ocamlfind & A"ocamlopt"; 352 | Options.ocamldep := ocamlfind & A"ocamldep"; 353 | Options.ocamldoc := ocamlfind & A"ocamldoc"; 354 | Options.ocamlmktop := ocamlfind & A"ocamlmktop"; 355 | Options.ocamlmklib := ocamlfind & A"ocamlmklib" 356 | 357 | | After_rules -> 358 | 359 | (* When one link an OCaml library/binary/package, one should use 360 | * -linkpkg *) 361 | flag ["ocaml"; "link"; "program"] & A"-linkpkg"; 362 | 363 | if not (conf.no_automatic_syntax) then begin 364 | (* For each ocamlfind package one inject the -package option when 365 | * compiling, computing dependencies, generating documentation and 366 | * linking. *) 367 | List.iter 368 | begin fun pkg -> 369 | let base_args = [A"-package"; A pkg] in 370 | (* TODO: consider how to really choose camlp4o or camlp4r. *) 371 | let syn_args = [A"-syntax"; A "camlp4o"] in 372 | let (args, pargs) = 373 | (* Heuristic to identify syntax extensions: whether they end in 374 | ".syntax"; some might not. 375 | *) 376 | if Filename.check_suffix pkg "syntax" || 377 | List.mem pkg well_known_syntax then 378 | (syn_args @ base_args, syn_args) 379 | else 380 | (base_args, []) 381 | in 382 | flag ["ocaml"; "compile"; "pkg_"^pkg] & S args; 383 | flag ["ocaml"; "ocamldep"; "pkg_"^pkg] & S args; 384 | flag ["ocaml"; "doc"; "pkg_"^pkg] & S args; 385 | flag ["ocaml"; "link"; "pkg_"^pkg] & S base_args; 386 | flag ["ocaml"; "infer_interface"; "pkg_"^pkg] & S args; 387 | 388 | (* TODO: Check if this is allowed for OCaml < 3.12.1 *) 389 | flag ["ocaml"; "compile"; "package("^pkg^")"] & S pargs; 390 | flag ["ocaml"; "ocamldep"; "package("^pkg^")"] & S pargs; 391 | flag ["ocaml"; "doc"; "package("^pkg^")"] & S pargs; 392 | flag ["ocaml"; "infer_interface"; "package("^pkg^")"] & S pargs; 393 | end 394 | (find_packages ()); 395 | end; 396 | 397 | (* Like -package but for extensions syntax. Morover -syntax is useless 398 | * when linking. *) 399 | List.iter begin fun syntax -> 400 | flag ["ocaml"; "compile"; "syntax_"^syntax] & S[A"-syntax"; A syntax]; 401 | flag ["ocaml"; "ocamldep"; "syntax_"^syntax] & S[A"-syntax"; A syntax]; 402 | flag ["ocaml"; "doc"; "syntax_"^syntax] & S[A"-syntax"; A syntax]; 403 | flag ["ocaml"; "infer_interface"; "syntax_"^syntax] & 404 | S[A"-syntax"; A syntax]; 405 | end (find_syntaxes ()); 406 | 407 | (* The default "thread" tag is not compatible with ocamlfind. 408 | * Indeed, the default rules add the "threads.cma" or "threads.cmxa" 409 | * options when using this tag. When using the "-linkpkg" option with 410 | * ocamlfind, this module will then be added twice on the command line. 411 | * 412 | * To solve this, one approach is to add the "-thread" option when using 413 | * the "threads" package using the previous plugin. 414 | *) 415 | flag ["ocaml"; "pkg_threads"; "compile"] (S[A "-thread"]); 416 | flag ["ocaml"; "pkg_threads"; "doc"] (S[A "-I"; A "+threads"]); 417 | flag ["ocaml"; "pkg_threads"; "link"] (S[A "-thread"]); 418 | flag ["ocaml"; "pkg_threads"; "infer_interface"] (S[A "-thread"]); 419 | flag ["ocaml"; "package(threads)"; "compile"] (S[A "-thread"]); 420 | flag ["ocaml"; "package(threads)"; "doc"] (S[A "-I"; A "+threads"]); 421 | flag ["ocaml"; "package(threads)"; "link"] (S[A "-thread"]); 422 | flag ["ocaml"; "package(threads)"; "infer_interface"] (S[A "-thread"]); 423 | 424 | | _ -> 425 | () 426 | end 427 | 428 | module MyOCamlbuildBase = struct 429 | (* # 22 "src/plugins/ocamlbuild/MyOCamlbuildBase.ml" *) 430 | 431 | 432 | (** Base functions for writing myocamlbuild.ml 433 | @author Sylvain Le Gall 434 | *) 435 | 436 | 437 | 438 | 439 | 440 | open Ocamlbuild_plugin 441 | module OC = Ocamlbuild_pack.Ocaml_compiler 442 | 443 | 444 | type dir = string 445 | type file = string 446 | type name = string 447 | type tag = string 448 | 449 | 450 | (* # 62 "src/plugins/ocamlbuild/MyOCamlbuildBase.ml" *) 451 | 452 | 453 | type t = 454 | { 455 | lib_ocaml: (name * dir list * string list) list; 456 | lib_c: (name * dir * file list) list; 457 | flags: (tag list * (spec OASISExpr.choices)) list; 458 | (* Replace the 'dir: include' from _tags by a precise interdepends in 459 | * directory. 460 | *) 461 | includes: (dir * dir list) list; 462 | } 463 | 464 | 465 | let env_filename = 466 | Pathname.basename 467 | BaseEnvLight.default_filename 468 | 469 | 470 | let dispatch_combine lst = 471 | fun e -> 472 | List.iter 473 | (fun dispatch -> dispatch e) 474 | lst 475 | 476 | 477 | let tag_libstubs nm = 478 | "use_lib"^nm^"_stubs" 479 | 480 | 481 | let nm_libstubs nm = 482 | nm^"_stubs" 483 | 484 | 485 | let dispatch t e = 486 | let env = 487 | BaseEnvLight.load 488 | ~filename:env_filename 489 | ~allow_empty:true 490 | () 491 | in 492 | match e with 493 | | Before_options -> 494 | let no_trailing_dot s = 495 | if String.length s >= 1 && s.[0] = '.' then 496 | String.sub s 1 ((String.length s) - 1) 497 | else 498 | s 499 | in 500 | List.iter 501 | (fun (opt, var) -> 502 | try 503 | opt := no_trailing_dot (BaseEnvLight.var_get var env) 504 | with Not_found -> 505 | Printf.eprintf "W: Cannot get variable %s\n" var) 506 | [ 507 | Options.ext_obj, "ext_obj"; 508 | Options.ext_lib, "ext_lib"; 509 | Options.ext_dll, "ext_dll"; 510 | ] 511 | 512 | | After_rules -> 513 | (* Declare OCaml libraries *) 514 | List.iter 515 | (function 516 | | nm, [], intf_modules -> 517 | ocaml_lib nm; 518 | let cmis = 519 | List.map (fun m -> (String.uncapitalize m) ^ ".cmi") 520 | intf_modules in 521 | dep ["ocaml"; "link"; "library"; "file:"^nm^".cma"] cmis 522 | | nm, dir :: tl, intf_modules -> 523 | ocaml_lib ~dir:dir (dir^"/"^nm); 524 | List.iter 525 | (fun dir -> 526 | List.iter 527 | (fun str -> 528 | flag ["ocaml"; "use_"^nm; str] (S[A"-I"; P dir])) 529 | ["compile"; "infer_interface"; "doc"]) 530 | tl; 531 | let cmis = 532 | List.map (fun m -> dir^"/"^(String.uncapitalize m)^".cmi") 533 | intf_modules in 534 | dep ["ocaml"; "link"; "library"; "file:"^dir^"/"^nm^".cma"] 535 | cmis) 536 | t.lib_ocaml; 537 | 538 | (* Declare directories dependencies, replace "include" in _tags. *) 539 | List.iter 540 | (fun (dir, include_dirs) -> 541 | Pathname.define_context dir include_dirs) 542 | t.includes; 543 | 544 | (* Declare C libraries *) 545 | List.iter 546 | (fun (lib, dir, headers) -> 547 | (* Handle C part of library *) 548 | flag ["link"; "library"; "ocaml"; "byte"; tag_libstubs lib] 549 | (S[A"-dllib"; A("-l"^(nm_libstubs lib)); A"-cclib"; 550 | A("-l"^(nm_libstubs lib))]); 551 | 552 | flag ["link"; "library"; "ocaml"; "native"; tag_libstubs lib] 553 | (S[A"-cclib"; A("-l"^(nm_libstubs lib))]); 554 | 555 | flag ["link"; "program"; "ocaml"; "byte"; tag_libstubs lib] 556 | (S[A"-dllib"; A("dll"^(nm_libstubs lib))]); 557 | 558 | (* When ocaml link something that use the C library, then one 559 | need that file to be up to date. 560 | This holds both for programs and for libraries. 561 | *) 562 | dep ["link"; "ocaml"; tag_libstubs lib] 563 | [dir/"lib"^(nm_libstubs lib)^"."^(!Options.ext_lib)]; 564 | 565 | dep ["compile"; "ocaml"; tag_libstubs lib] 566 | [dir/"lib"^(nm_libstubs lib)^"."^(!Options.ext_lib)]; 567 | 568 | (* TODO: be more specific about what depends on headers *) 569 | (* Depends on .h files *) 570 | dep ["compile"; "c"] 571 | headers; 572 | 573 | (* Setup search path for lib *) 574 | flag ["link"; "ocaml"; "use_"^lib] 575 | (S[A"-I"; P(dir)]); 576 | ) 577 | t.lib_c; 578 | 579 | (* Add flags *) 580 | List.iter 581 | (fun (tags, cond_specs) -> 582 | let spec = BaseEnvLight.var_choose cond_specs env in 583 | let rec eval_specs = 584 | function 585 | | S lst -> S (List.map eval_specs lst) 586 | | A str -> A (BaseEnvLight.var_expand str env) 587 | | spec -> spec 588 | in 589 | flag tags & (eval_specs spec)) 590 | t.flags 591 | | _ -> 592 | () 593 | 594 | 595 | let dispatch_default conf t = 596 | dispatch_combine 597 | [ 598 | dispatch t; 599 | MyOCamlbuildFindlib.dispatch conf; 600 | ] 601 | 602 | 603 | end 604 | 605 | 606 | # 606 "myocamlbuild.ml" 607 | open Ocamlbuild_plugin;; 608 | let package_default = 609 | { 610 | MyOCamlbuildBase.lib_ocaml = [("pareto", ["lib"], [])]; 611 | lib_c = []; 612 | flags = 613 | [ 614 | (["oasis_library_pareto_byte"; "ocaml"; "link"; "byte"], 615 | [ 616 | (OASISExpr.EBool true, S []); 617 | (OASISExpr.EFlag "strict", 618 | S [A "-w"; A "@a"; A "-warn-error"; A "-a"]) 619 | ]); 620 | (["oasis_library_pareto_native"; "ocaml"; "link"; "native"], 621 | [ 622 | (OASISExpr.EBool true, S []); 623 | (OASISExpr.EFlag "strict", 624 | S [A "-w"; A "@a"; A "-warn-error"; A "-a"]) 625 | ]); 626 | (["oasis_library_pareto_byte"; "ocaml"; "ocamldep"; "byte"], 627 | [ 628 | (OASISExpr.EBool true, S []); 629 | (OASISExpr.EFlag "strict", 630 | S [A "-w"; A "@a"; A "-warn-error"; A "-a"]) 631 | ]); 632 | (["oasis_library_pareto_native"; "ocaml"; "ocamldep"; "native"], 633 | [ 634 | (OASISExpr.EBool true, S []); 635 | (OASISExpr.EFlag "strict", 636 | S [A "-w"; A "@a"; A "-warn-error"; A "-a"]) 637 | ]); 638 | (["oasis_library_pareto_byte"; "ocaml"; "compile"; "byte"], 639 | [ 640 | (OASISExpr.EBool true, S []); 641 | (OASISExpr.EFlag "strict", 642 | S [A "-w"; A "@a"; A "-warn-error"; A "-a"]) 643 | ]); 644 | (["oasis_library_pareto_native"; "ocaml"; "compile"; "native"], 645 | [ 646 | (OASISExpr.EBool true, S []); 647 | (OASISExpr.EFlag "strict", 648 | S [A "-w"; A "@a"; A "-warn-error"; A "-a"]) 649 | ]); 650 | (["oasis_executable_base_ex_byte"; "ocaml"; "link"; "byte"], 651 | [(OASISExpr.EBool true, S [A "-w"; A "@a"])]); 652 | (["oasis_executable_base_ex_native"; "ocaml"; "link"; "native"], 653 | [(OASISExpr.EBool true, S [A "-w"; A "@a"])]); 654 | (["oasis_executable_base_ex_byte"; "ocaml"; "ocamldep"; "byte"], 655 | [(OASISExpr.EBool true, S [A "-w"; A "@a"])]); 656 | (["oasis_executable_base_ex_native"; "ocaml"; "ocamldep"; "native"], 657 | [(OASISExpr.EBool true, S [A "-w"; A "@a"])]); 658 | (["oasis_executable_base_ex_byte"; "ocaml"; "compile"; "byte"], 659 | [(OASISExpr.EBool true, S [A "-w"; A "@a"])]); 660 | (["oasis_executable_base_ex_native"; "ocaml"; "compile"; "native"], 661 | [(OASISExpr.EBool true, S [A "-w"; A "@a"])]); 662 | (["oasis_executable_distributions_ex_byte"; "ocaml"; "link"; "byte" 663 | ], 664 | [(OASISExpr.EBool true, S [A "-w"; A "@a"])]); 665 | ([ 666 | "oasis_executable_distributions_ex_native"; 667 | "ocaml"; 668 | "link"; 669 | "native" 670 | ], 671 | [(OASISExpr.EBool true, S [A "-w"; A "@a"])]); 672 | ([ 673 | "oasis_executable_distributions_ex_byte"; 674 | "ocaml"; 675 | "ocamldep"; 676 | "byte" 677 | ], 678 | [(OASISExpr.EBool true, S [A "-w"; A "@a"])]); 679 | ([ 680 | "oasis_executable_distributions_ex_native"; 681 | "ocaml"; 682 | "ocamldep"; 683 | "native" 684 | ], 685 | [(OASISExpr.EBool true, S [A "-w"; A "@a"])]); 686 | ([ 687 | "oasis_executable_distributions_ex_byte"; 688 | "ocaml"; 689 | "compile"; 690 | "byte" 691 | ], 692 | [(OASISExpr.EBool true, S [A "-w"; A "@a"])]); 693 | ([ 694 | "oasis_executable_distributions_ex_native"; 695 | "ocaml"; 696 | "compile"; 697 | "native" 698 | ], 699 | [(OASISExpr.EBool true, S [A "-w"; A "@a"])]); 700 | (["oasis_executable_tests_ex_byte"; "ocaml"; "link"; "byte"], 701 | [(OASISExpr.EBool true, S [A "-w"; A "@a"])]); 702 | (["oasis_executable_tests_ex_native"; "ocaml"; "link"; "native"], 703 | [(OASISExpr.EBool true, S [A "-w"; A "@a"])]); 704 | (["oasis_executable_tests_ex_byte"; "ocaml"; "ocamldep"; "byte"], 705 | [(OASISExpr.EBool true, S [A "-w"; A "@a"])]); 706 | (["oasis_executable_tests_ex_native"; "ocaml"; "ocamldep"; "native" 707 | ], 708 | [(OASISExpr.EBool true, S [A "-w"; A "@a"])]); 709 | (["oasis_executable_tests_ex_byte"; "ocaml"; "compile"; "byte"], 710 | [(OASISExpr.EBool true, S [A "-w"; A "@a"])]); 711 | (["oasis_executable_tests_ex_native"; "ocaml"; "compile"; "native"], 712 | [(OASISExpr.EBool true, S [A "-w"; A "@a"])]); 713 | (["oasis_executable_sample_ex_byte"; "ocaml"; "link"; "byte"], 714 | [(OASISExpr.EBool true, S [A "-w"; A "@a"])]); 715 | (["oasis_executable_sample_ex_native"; "ocaml"; "link"; "native"], 716 | [(OASISExpr.EBool true, S [A "-w"; A "@a"])]); 717 | (["oasis_executable_sample_ex_byte"; "ocaml"; "ocamldep"; "byte"], 718 | [(OASISExpr.EBool true, S [A "-w"; A "@a"])]); 719 | ([ 720 | "oasis_executable_sample_ex_native"; 721 | "ocaml"; 722 | "ocamldep"; 723 | "native" 724 | ], 725 | [(OASISExpr.EBool true, S [A "-w"; A "@a"])]); 726 | (["oasis_executable_sample_ex_byte"; "ocaml"; "compile"; "byte"], 727 | [(OASISExpr.EBool true, S [A "-w"; A "@a"])]); 728 | (["oasis_executable_sample_ex_native"; "ocaml"; "compile"; "native" 729 | ], 730 | [(OASISExpr.EBool true, S [A "-w"; A "@a"])]); 731 | (["oasis_executable_resampling_ex_byte"; "ocaml"; "link"; "byte"], 732 | [(OASISExpr.EBool true, S [A "-w"; A "@a"])]); 733 | ([ 734 | "oasis_executable_resampling_ex_native"; 735 | "ocaml"; 736 | "link"; 737 | "native" 738 | ], 739 | [(OASISExpr.EBool true, S [A "-w"; A "@a"])]); 740 | ([ 741 | "oasis_executable_resampling_ex_byte"; 742 | "ocaml"; 743 | "ocamldep"; 744 | "byte" 745 | ], 746 | [(OASISExpr.EBool true, S [A "-w"; A "@a"])]); 747 | ([ 748 | "oasis_executable_resampling_ex_native"; 749 | "ocaml"; 750 | "ocamldep"; 751 | "native" 752 | ], 753 | [(OASISExpr.EBool true, S [A "-w"; A "@a"])]); 754 | (["oasis_executable_resampling_ex_byte"; "ocaml"; "compile"; "byte" 755 | ], 756 | [(OASISExpr.EBool true, S [A "-w"; A "@a"])]); 757 | ([ 758 | "oasis_executable_resampling_ex_native"; 759 | "ocaml"; 760 | "compile"; 761 | "native" 762 | ], 763 | [(OASISExpr.EBool true, S [A "-w"; A "@a"])]) 764 | ]; 765 | includes = [("lib_test", ["lib"]); ("examples", ["lib"])] 766 | } 767 | ;; 768 | 769 | let conf = {MyOCamlbuildFindlib.no_automatic_syntax = false} 770 | 771 | let dispatch_default = MyOCamlbuildBase.dispatch_default conf package_default;; 772 | 773 | # 774 "myocamlbuild.ml" 774 | (* OASIS_STOP *) 775 | Ocamlbuild_plugin.dispatch dispatch_default;; 776 | -------------------------------------------------------------------------------- /opam: -------------------------------------------------------------------------------- 1 | opam-version: "1" 2 | maintainer: "superbobry@gmail.com" 3 | build: [ 4 | ["./configure" "--prefix" "%{prefix}%"] 5 | [make] 6 | [make "install"] 7 | ] 8 | remove: [ 9 | ["ocamlfind" "remove" "pareto"] 10 | ] 11 | depends: ["ocamlfind" "gsl" {>= "1.13.0"}] 12 | ocaml-version: [>= "4.00.1"] 13 | 14 | homepage: "https://github.com/superbobry/pareto" 15 | license: "MIT" 16 | authors: ["Sergei Lebedev"] 17 | -------------------------------------------------------------------------------- /setup.ml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/superbobry/pareto/8b3b27bce7b7df5d9713d16ed40a844861aa368e/setup.ml --------------------------------------------------------------------------------