├── .formatter.exs
├── .github
    └── workflows
    │   └── elixir.yml
├── .gitignore
├── LICENSE
├── README.md
├── lib
    ├── math
    │   ├── functions.ex
    │   └── math.ex
    ├── statistics.ex
    └── statistics
    │   ├── distributions
    │       ├── beta.ex
    │       ├── binomial.ex
    │       ├── chisq.ex
    │       ├── exponential.ex
    │       ├── f.ex
    │       ├── hypergeometric.ex
    │       ├── normal.ex
    │       ├── poisson.ex
    │       └── t.ex
    │   └── tests
    │       └── t_test.ex
├── mix.exs
├── package.exs
└── test
    ├── beta_distribution_test.exs
    ├── binomial_distribution_test.exs
    ├── chisq_distribution_test.exs
    ├── descriptive_test.exs
    ├── exponential_distribution_test.exs
    ├── f_distribution_test.exs
    ├── hypergeometric_distribution_test.exs
    ├── math_functions_test.exs
    ├── math_utils_test.exs
    ├── normal_distribution_test.exs
    ├── poisson_distribution_test.exs
    ├── t_distribution_test.exs
    ├── t_test_test.exs
    └── test_helper.exs


/.formatter.exs:
--------------------------------------------------------------------------------
1 | # Used by "mix format"
2 | [
3 |   inputs: ["mix.exs", "{config,lib,test}/**/*.{ex,exs}"]
4 | ]
5 | 


--------------------------------------------------------------------------------
/.github/workflows/elixir.yml:
--------------------------------------------------------------------------------
 1 | # This workflow uses actions that are not certified by GitHub.
 2 | # They are provided by a third-party and are governed by
 3 | # separate terms of service, privacy policy, and support
 4 | # documentation.
 5 | 
 6 | name: Elixir CI
 7 | 
 8 | on:
 9 |   push:
10 |     branches: [ "master" ]
11 |   pull_request:
12 |     branches: [ "master" ]
13 | 
14 | permissions:
15 |   contents: read
16 | 
17 | jobs:
18 |   build:
19 | 
20 |     name: Build and test
21 |     runs-on: ubuntu-latest
22 | 
23 |     steps:
24 |     - uses: actions/checkout@v3
25 |     - name: Set up Elixir
26 |       uses: erlef/setup-beam@61e01a43a562a89bfc54c7f9a378ff67b03e4a21 # v1.16.0
27 |       with:
28 |         elixir-version: '1.15.2' # [Required] Define the Elixir version
29 |         otp-version: '26.0'      # [Required] Define the Erlang/OTP version
30 |     - name: Restore dependencies cache
31 |       uses: actions/cache@v3
32 |       with:
33 |         path: deps
34 |         key: ${{ runner.os }}-mix-${{ hashFiles('**/mix.lock') }}
35 |         restore-keys: ${{ runner.os }}-mix-
36 |     - name: Install dependencies
37 |       run: mix deps.get
38 |     - name: Run tests
39 |       run: mix test
40 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
 1 | /ebin
 2 | /deps
 3 | erl_crash.dump
 4 | *.ez
 5 | /_build
 6 | /doc
 7 | mix.lock
 8 | TODO
 9 | *.txt
10 | priv/
11 | .idea/
12 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | Copyright (c) 2014 Max Sharples
 2 | 
 3 | Licensed under the Apache License, Version 2.0 (the "License");
 4 | you may not use this file except in compliance with the License.
 5 | You may obtain a copy of the License at
 6 | 
 7 |     http://www.apache.org/licenses/LICENSE-2.0
 8 | 
 9 | Unless required by applicable law or agreed to in writing, software
10 | distributed under the License is distributed on an "AS IS" BASIS,
11 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | See the License for the specific language governing permissions and
13 | limitations under the License.
14 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | #  Statistics
 2 | 
 3 | [![Build Status](https://github.com/msharp/elixir-statistics/actions/workflows/elixir.yml/badge.svg)](https://github.com/msharp/elixir-statistics/actions/workflows/elixir.yml)
 4 | [![hex.pm version](https://img.shields.io/hexpm/v/statistics.svg?style=flat)](https://hex.pm/packages/statistics)
 5 | 
 6 | Statistics functions and distributions for [Elixir](https://github.com/elixir-lang/elixir).
 7 | 
 8 | ## Usage
 9 | 
10 | Add Statistics as a dependency in your `mix.exs` file to install from [hex.pm](https://hex.pm).
11 | 
12 | ```elixir
13 | def deps do
14 |   [
15 |     { :statistics, "~> 0.6"}
16 |   ]
17 | end
18 | ```
19 | 
20 | After you are done, run `mix deps.get` in your shell to fetch and compile Statistics.
21 | 
22 | To try it out, start an interactive Elixir shell with `iex -S mix`.
23 | 
24 | Get the median value from a list
25 | 
26 | ```
27 | iex> Statistics.median([1,2,3])
28 | 2
29 | ```
30 | 
31 | Calculate the variance of a list of values.
32 | 
33 | ```
34 | iex> Statistics.variance([1,2,3,4])
35 | 1.25
36 | ```
37 | 
38 | Or draw a random number from a Gaussian distribution with a mean of 1 and standard deviation of 2.
39 | 
40 | ```
41 | iex> Statistics.Distributions.Normal.rand(1, 2)
42 | 2.5998185179627384
43 | ```
44 | 
45 | ## Documentation
46 | 
47 | Elixir has great documentation tools using `ex_doc`.
48 | 
49 | The docs are hosted on [hexdocs.pm/statistics](http://hexdocs.pm/statistics/).
50 | 
51 | ## Performance
52 | 
53 | This is not a library to use if you need fast computation.
54 | 
55 | Everything is implemented in Elixir. Many of the implementations use slow approximations, numerical function integration, or trial-and-error methods.
56 | 
57 | There is much room for improvement. To make this library really fast (and precise), we would probably need to interface with existing C libraries.
58 | 
59 | ## Contributing
60 | 
61 | I will accept pull requests.
62 | 
63 | If you want to contribute, please create a topic branch with tests and submit a pull request.
64 | 
65 | ## License
66 | 
67 | Apache 2
68 | 


--------------------------------------------------------------------------------
/lib/math/functions.ex:
--------------------------------------------------------------------------------
  1 | defmodule Statistics.Math.Functions do
  2 |   alias Statistics.Math
  3 | 
  4 |   @doc """
  5 |   The Gamma function
  6 | 
  7 |   This implementation uses the [Lanczos approximation](http://en.wikipedia.org/wiki/Lanczos_approximation)
  8 | 
  9 |   ## Examples
 10 | 
 11 |       iex> Statistics.Math.Functions.gamma(0.5)
 12 |       1.7724538509055159
 13 | 
 14 |   """
 15 |   @spec gamma(number) :: number
 16 |   def gamma(x) do
 17 |     gamma_lanczos(x)
 18 |     # gamma_taylor(x)
 19 |   end
 20 | 
 21 |   defp gamma_lanczos(x) do
 22 |     # coefficients used by the GNU Scientific Library
 23 |     g = 7
 24 | 
 25 |     p = [
 26 |       0.99999999999980993,
 27 |       676.5203681218851,
 28 |       -1259.1392167224028,
 29 |       771.32342877765313,
 30 |       -176.61502916214059,
 31 |       12.507343278686905,
 32 |       -0.13857109526572012,
 33 |       9.9843695780195716e-6,
 34 |       1.5056327351493116e-7
 35 |     ]
 36 | 
 37 |     # recursive formula
 38 |     if x < 0.5 do
 39 |       Math.pi() / (:math.sin(Math.pi() * x) * gamma_lanczos(1 - x))
 40 |     else
 41 |       z = x - 1
 42 |       xs = for i <- 1..8, do: Enum.at(p, i) / (z + i)
 43 |       x = Enum.at(p, 0) + Enum.sum(xs)
 44 |       t = z + g + 0.5
 45 |       Math.sqrt(2 * Math.pi()) * Math.pow(t, z + 0.5) * Math.exp(-1 * t) * x
 46 |     end
 47 |   end
 48 | 
 49 |   @doc """
 50 |   The Beta function
 51 | 
 52 |   ## Examples
 53 | 
 54 |       iex> Statistics.Math.Functions.beta(2, 0.5)
 55 |       1.3333333333333324
 56 | 
 57 |   """
 58 |   @spec beta(number, number) :: number
 59 |   def beta(x, y) do
 60 |     # from https://en.wikipedia.org/wiki/Beta_function#Properties
 61 |     gamma(x) * gamma(y) / gamma(x + y)
 62 |   end
 63 | 
 64 |   @doc """
 65 |   The 'error' function
 66 | 
 67 |   Formula 7.1.26 given in Abramowitz and Stegun.
 68 |   Formula appears as 1 – (a1t1 + a2t2 + a3t3 + a4t4 + a5t5)exp(-x2)
 69 | 
 70 |   """
 71 |   # Some wisdom in Horner's Method of coding polynomials:
 72 |   #  - We could evaluate a polynomial of the form a + bx + cx^2 + dx^3 by coding as a + b*x + c*x*x + d*x*x*x.
 73 |   #  - But we can save computational power by coding it as ((d*x + c)*x + b)*x + a.
 74 |   #  - The formula below was coded this way bringing down the complexity of this algorithm from O(n2) to O(n).''
 75 |   @spec erf(number) :: number
 76 |   def erf(x) do
 77 |     # constants
 78 |     {a1, a2, a3, a4, a5} = {0.254829592, -0.284496736, 1.421413741, -1.453152027, 1.061405429}
 79 |     p = 0.3275911
 80 | 
 81 |     # Save the sign of x
 82 |     sign = if x < 0, do: -1, else: 1
 83 |     x = abs(x)
 84 | 
 85 |     # Formula 7.1.26 given in Abramowitz and Stegun.
 86 |     t = 1.0 / (1.0 + p * x)
 87 |     y = 1.0 - ((((a5 * t + a4) * t + a3) * t + a2) * t + a1) * t * Math.pow(Math.e(), -x * x)
 88 | 
 89 |     sign * y
 90 |   end
 91 | 
 92 |   @doc """
 93 |   The  inverse 'error' function
 94 |   """
 95 |   @spec inv_erf(number) :: number
 96 |   def inv_erf(x) do
 97 |     # constants
 98 |     {c0, c1, c2} = {2.515517, 0.802853, 0.010328}
 99 |     {d0, d1, d2} = {1.432788, 0.189269, 0.001308}
100 |     # formula
101 |     x - ((c2 * x + c1) * x + c0) / (((d2 * x + d1) * x + d0) * x + 1.0)
102 |   end
103 | 
104 |   @doc """
105 |   Lower incomplete Gamma function
106 | 
107 |   ## Examples
108 | 
109 |       iex> Statistics.Math.Functions.gammainc(1,1)
110 |       0.63212055882855778
111 | 
112 |   """
113 |   # ############################
114 |   # this simple approach adapted from
115 |   # http://www.dreamincode.net/forums/topic/12775-statistical-functions/
116 |   #
117 |   # there are alternate implementation strategies to try,
118 |   # for examples, see:
119 |   #
120 |   #   : https://mail.python.org/pipermail/python-list/2001-April/092498.html
121 |   #   : http://www.dreamincode.net/forums/topic/12775-statistical-functions/
122 |   #   : http://www.crbond.com/math.htm
123 |   #
124 |   # ###########################
125 |   @spec gammainc(number, number) :: number
126 |   def gammainc(a, x) do
127 |     Math.pow(x, a) * Math.exp(-x) * gammainc_sum(a, x, 1 / a, 0, 1)
128 |   end
129 | 
130 |   defp gammainc_sum(_, _, t, s, _) when t == 0.0 do
131 |     s
132 |   end
133 | 
134 |   defp gammainc_sum(a, x, t, s, n) do
135 |     s = s + t
136 |     t = t * (x / (a + n))
137 |     gammainc_sum(a, x, t, s, n + 1)
138 |   end
139 | 
140 |   @doc """
141 |   Hypergeometrc 2F1 functiono
142 | 
143 |   WARNING: the implementation is incomplete, and should not be used
144 | 
145 |   """
146 |   # from http://mhtlab.uwaterloo.ca/courses/me755/web_chap7.pdf
147 |   @spec hyp2f1(number, number, number, number) :: number
148 |   def hyp2f1(a, b, c, x) do
149 |     pb = gamma(c) / gamma(a) * gamma(b)
150 |     pa = hyp2f1_cont(a, b, c, x)
151 |     pb * pa
152 |   end
153 | 
154 |   defp hyp2f1_cont(a, b, c, x) do
155 |     hyp2f1_cont(a, b, c, x, 0, 0)
156 |   end
157 | 
158 |   defp hyp2f1_cont(_, _, _, _, n, acc) when n > 50 do
159 |     acc
160 |   end
161 | 
162 |   defp hyp2f1_cont(a, b, c, x, n, acc) do
163 |     s = gamma(a + n) * gamma(b + n) / gamma(c + n)
164 |     p = Math.pow(x, n) / Math.factorial(n)
165 |     hyp2f1_cont(a, b, c, x, n + 1, acc + s * p)
166 |   end
167 | 
168 |   @doc """
169 |   Simpsons rule for numerical integration of a function
170 | 
171 |   see: http://en.wikipedia.org/wiki/Simpson's_rule
172 | 
173 |   ## Examples
174 | 
175 |       iex> Statistics.Math.Functions.simpson(fn x -> x*x*x end, 0, 20, 100000)
176 |       40000.00000000011
177 | 
178 |   """
179 |   @spec simpson(fun, number, number, number) :: number
180 |   def simpson(f, a, b, n) do
181 |     h = (b - a) / n
182 |     s1 = f.(a) + f.(b)
183 | 
184 |     s2 =
185 |       Stream.take_every(1..(n - 1), 2)
186 |       |> Enum.map(fn i -> 4 * f.(a + i * h) end)
187 |       |> Enum.sum()
188 | 
189 |     s3 =
190 |       Stream.take_every(2..(n - 2), 2)
191 |       |> Enum.map(fn i -> 2 * f.(a + i * h) end)
192 |       |> Enum.sum()
193 | 
194 |     (s1 + s2 + s3) * h / 3
195 |   end
196 | end
197 | 


--------------------------------------------------------------------------------
/lib/math/math.ex:
--------------------------------------------------------------------------------
  1 | defmodule Statistics.Math do
  2 |   @e :math.exp(1)
  3 |   @pi :math.pi()
  4 | 
  5 |   @doc """
  6 |   Get square root
  7 | 
  8 |   return sqrt from Erlang
  9 | 
 10 |   ## Examples
 11 | 
 12 |       iex> Statistics.Math.sqrt(9)
 13 |       3.0
 14 |       iex> Statistics.Math.sqrt(99)
 15 |       9.9498743710662
 16 | 
 17 |   """
 18 |   @spec sqrt(number) :: number
 19 |   defdelegate sqrt(num), to: :math
 20 | 
 21 |   @doc """
 22 |   Get power from Erlang
 23 | 
 24 |   This is needed because Elixir doesn't
 25 |   currently have the `**` operator
 26 | 
 27 |   ## Examples
 28 | 
 29 |       iex> Statistics.Math.pow(2,3)
 30 |       8.0
 31 |       iex> Statistics.Math.pow(9,9)
 32 |       387420489.0
 33 |       iex> Statistics.Math.pow(2,0)
 34 |       1
 35 |       iex> Statistics.Math.pow(-2, 1.5)
 36 |       -2.8284271247461903
 37 |       iex> Statistics.Math.pow(0, 5)
 38 |       0
 39 | 
 40 |   """
 41 |   @spec pow(number, number) :: number
 42 |   def pow(_, 0), do: 1
 43 |   def pow(0, pow) when pow >= 0, do: 0
 44 |   # Erlang doesn't like raising negative numbers to non-integer powers
 45 |   def pow(num, pow) when num < 0 and is_float(pow) do
 46 |     :math.pow(-num, pow) * -1
 47 |   end
 48 | 
 49 |   # otherwise let erlang do it
 50 |   defdelegate pow(num, pow), to: :math
 51 | 
 52 |   @doc """
 53 |   The constant *e*
 54 | 
 55 |   ## Examples
 56 | 
 57 |       iex> Statistics.Math.e
 58 |       2.718281828459045
 59 | 
 60 |   """
 61 |   @spec e() :: number
 62 |   def e do
 63 |     @e
 64 |   end
 65 | 
 66 |   @doc """
 67 |   The constant *pi*
 68 | 
 69 |   (returned from Erlang Math module)
 70 | 
 71 |   ## Examples
 72 | 
 73 |       iex> Statistics.Math.pi
 74 |       3.141592653589793
 75 | 
 76 |   """
 77 |   @spec pi() :: number
 78 |   def pi do
 79 |     @pi
 80 |   end
 81 | 
 82 |   @doc """
 83 |   The natural log
 84 | 
 85 |   ( from Erlang Math module)
 86 | 
 87 |   ## Examples
 88 | 
 89 |       iex> Statistics.Math.ln(20)
 90 |       2.995732273553991
 91 |       iex> Statistics.Math.ln(200)
 92 |       5.298317366548036
 93 | 
 94 |   """
 95 |   @spec ln(number) :: number
 96 |   defdelegate ln(i), to: :math, as: :log
 97 | 
 98 |   @doc """
 99 |   Exponent function
100 | 
101 |   Raise *e* to given power
102 | 
103 |   ## Examples
104 | 
105 |       iex> Statistics.Math.exp(5.6)
106 |       270.42640742615254
107 | 
108 |   """
109 |   @spec exp(number) :: number
110 |   defdelegate exp(x), to: :math
111 | 
112 |   @doc """
113 |   Get a random number from erlang
114 |   """
115 |   @spec rand() :: number
116 |   defdelegate rand(), to: :rand, as: :uniform
117 | 
118 |   @doc """
119 |   Round a decimal to a specific precision
120 | 
121 |   ## Examples
122 | 
123 |       iex> Statistics.Math.round(0.123456, 4)
124 |       0.1235
125 | 
126 |   """
127 |   @spec round(number, number) :: number
128 |   def round(x, precision) do
129 |     p = pow(10, precision)
130 |     :erlang.round(x * p) / p
131 |   end
132 | 
133 |   @doc """
134 |   Floor function
135 | 
136 |   ## Examples
137 | 
138 |       iex> Statistics.Math.floor(3.999)
139 |       3.0
140 | 
141 |   """
142 |   @spec floor(number) :: number
143 |   def floor(x) do
144 |     f = :erlang.trunc(x) * 1.0
145 | 
146 |     cond do
147 |       x - f >= 0 ->
148 |         f
149 | 
150 |       x - f < 0 ->
151 |         f - 1
152 |     end
153 |   end
154 | 
155 |   @doc """
156 |   Ceiling function
157 | 
158 |   ## Examples
159 | 
160 |       iex> Statistics.Math.ceil(3.999)
161 |       4.0
162 | 
163 |   """
164 |   @spec ceil(number) :: number
165 |   def ceil(x) do
166 |     f = :erlang.trunc(x) * 1.0
167 | 
168 |     cond do
169 |       x - f > 0 ->
170 |         f + 1
171 | 
172 |       x - f <= 0 ->
173 |         f
174 |     end
175 |   end
176 | 
177 |   @doc """
178 |   Get the absolute value of a number
179 | 
180 |   ## Examples
181 | 
182 |       iex> Statistics.Math.abs(-4)
183 |       4
184 | 
185 |   """
186 |   @spec abs(number) :: number
187 |   defdelegate abs(x), to: :erlang
188 | 
189 |   @doc """
190 |   Factorial!
191 |   """
192 |   @spec factorial(non_neg_integer) :: non_neg_integer
193 |   def factorial(n) when n < 0 do
194 |     raise ArithmeticError, message: "Argument n must be a positive number"
195 |   end
196 | 
197 |   def factorial(n) when n == 0 or n == 1 do
198 |     1
199 |   end
200 | 
201 |   def factorial(n) do
202 |     (to_int(n) - 1)..1
203 |     |> Enum.to_list()
204 |     |> List.foldl(n, fn x, acc -> x * acc end)
205 |   end
206 | 
207 |   @doc """
208 |   Get the base integer from a float
209 | 
210 |   ## Examples
211 | 
212 |       iex> Statistics.Math.to_int(66.6666)
213 |       66
214 | 
215 |   """
216 |   @spec to_int(number) :: integer
217 |   defdelegate to_int(f), to: :erlang, as: :trunc
218 | 
219 |   @doc """
220 |   The number of k combinations of n
221 | 
222 |   Both arguments must be integers greater than zero. `k` may not be larger than `n`.
223 | 
224 |   ## Examples
225 | 
226 |       iex> Statistics.Math.combination(10, 3)
227 |       120
228 | 
229 |   """
230 |   @spec combination(non_neg_integer, non_neg_integer) :: non_neg_integer
231 |   def combination(n, k) when k > n, do: 0
232 |   def combination(n, _) when n < 0, do: 0
233 |   def combination(_, k) when k < 0, do: 0
234 | 
235 |   def combination(n, k) do
236 |     :erlang.div(factorial(n), factorial(k) * factorial(n - k))
237 |   end
238 | 
239 |   @doc """
240 |   The number of k permuations of n
241 | 
242 |   Both arguments must be integers greater than zero. `k` may not be larger than `n`.
243 | 
244 |   ## Examples
245 | 
246 |       iex> Statistics.Math.permutation(10, 3)
247 |       720
248 | 
249 |   """
250 |   @spec permutation(non_neg_integer, non_neg_integer) :: non_neg_integer
251 |   def permutation(n, k) when k > n, do: 0
252 |   def permutation(n, _) when n < 0, do: 0
253 |   # def permutation(_, k) when k < 0, do: 0
254 |   def permutation(n, k) do
255 |     :erlang.div(factorial(n), factorial(n - k))
256 |   end
257 | end
258 | 


--------------------------------------------------------------------------------
/lib/statistics.ex:
--------------------------------------------------------------------------------
  1 | defmodule Statistics do
  2 |   alias Statistics.Math
  3 | 
  4 |   @moduledoc """
  5 |   Descriptive statistics functions
  6 |   """
  7 | 
  8 |   @doc """
  9 |   Sum the contents of a list
 10 | 
 11 |   Calls Enum.sum/1
 12 |   """
 13 |   @spec sum([number]) :: number
 14 |   def sum(list) when is_list(list), do: do_sum(list, 0)
 15 | 
 16 |   defp do_sum([], t), do: t
 17 |   defp do_sum([x | xs], t), do: do_sum(xs, t + x)
 18 | 
 19 |   @doc """
 20 |   Calculate the mean from a list of numbers
 21 | 
 22 |   ## Examples
 23 | 
 24 |       iex> Statistics.mean([])
 25 |       nil
 26 |       iex> Statistics.mean([1,2,3])
 27 |       2.0
 28 | 
 29 |   """
 30 |   @spec mean([number]) :: float() | nil
 31 |   def mean(list) when is_list(list), do: do_mean(list, 0, 0)
 32 | 
 33 |   defp do_mean([], 0, 0), do: nil
 34 |   defp do_mean([], t, l), do: t / l
 35 | 
 36 |   defp do_mean([x | xs], t, l) do
 37 |     do_mean(xs, t + x, l + 1)
 38 |   end
 39 | 
 40 |   @doc """
 41 |   Get the median value from a list.
 42 | 
 43 |   ## Examples
 44 | 
 45 |       iex> Statistics.median([])
 46 |       nil
 47 |       iex> Statistics.median([1,2,3])
 48 |       2
 49 |       iex> Statistics.median([1,2,3,4])
 50 |       2.5
 51 | 
 52 |   """
 53 |   @spec median([number]) :: number | nil
 54 |   def median([]), do: nil
 55 | 
 56 |   def median(list) when is_list(list) do
 57 |     midpoint =
 58 |       (length(list) / 2)
 59 |       |> Float.floor()
 60 |       |> round
 61 | 
 62 |     {l1, l2} =
 63 |       Enum.sort(list)
 64 |       |> Enum.split(midpoint)
 65 | 
 66 |     case length(l2) > length(l1) do
 67 |       true ->
 68 |         [med | _] = l2
 69 |         med
 70 | 
 71 |       false ->
 72 |         [m1 | _] = l2
 73 |         [m2 | _] = Enum.reverse(l1)
 74 |         mean([m1, m2])
 75 |     end
 76 |   end
 77 | 
 78 |   @doc """
 79 |   Get the most frequently occuring value
 80 | 
 81 |   ## Examples
 82 | 
 83 |       iex> Statistics.mode([])
 84 |       nil
 85 |       iex> Statistics.mode([1,2,3,2,4,5,2,6,7,2,8,9])
 86 |       2
 87 | 
 88 |   """
 89 |   @spec mode([number]) :: number | nil
 90 |   def mode([]), do: nil
 91 | 
 92 |   def mode(list) when is_list(list) do
 93 |     h = hist(list)
 94 |     max = Map.values(h) |> Enum.max()
 95 |     h |> Enum.find(fn {_, val} -> val == max end) |> elem(0)
 96 |   end
 97 | 
 98 |   @doc """
 99 |   Get a frequency count of the values in a list
100 | 
101 |   ## Examples
102 | 
103 |       iex> Statistics.hist([])
104 |       nil
105 |       iex> Statistics.hist([1,2,3,2,4,5,2,5,1,2,5,5])
106 |       %{1 => 2, 2 => 4, 3 => 1, 4 => 1, 5 => 4}
107 | 
108 |   """
109 |   @spec hist([number]) :: map | nil
110 |   def hist([]), do: nil
111 | 
112 |   def hist(list) when is_list(list) do
113 |     list
114 |     |> Enum.reduce(%{}, fn tag, acc -> Map.update(acc, tag, 1, &(&1 + 1)) end)
115 |   end
116 | 
117 |   @doc """
118 |   Get the minimum value from a list
119 | 
120 |       iex> Statistics.min([])
121 |       nil
122 |       iex> Statistics.min([1,2,3])
123 |       1
124 | 
125 |   If a non-empty list is provided, it is a call to Enum.min/1
126 |   """
127 |   @spec min([number]) :: number | nil
128 |   def min([]), do: nil
129 | 
130 |   def min(list) do
131 |     Enum.min(list)
132 |   end
133 | 
134 |   @doc """
135 |   Get the maximum value from a list
136 | 
137 |       iex> Statistics.max([])
138 |       nil
139 |       iex> Statistics.max([1,2,3])
140 |       3
141 | 
142 |   If a non-empty list is provided, it is a call to Enum.max/1
143 |   """
144 |   @spec max([number]) :: number | nil
145 |   def max([]), do: nil
146 | 
147 |   def max(list) do
148 |     Enum.max(list)
149 |   end
150 | 
151 |   @doc """
152 |   Get the quartile cutoff value from a list
153 | 
154 |   responds to only first and third quartile.
155 | 
156 |   ## Examples
157 | 
158 |       iex>  Statistics.quartile([1,2,3,4,5,6,7,8,9],:first)
159 |       3
160 |       iex>  Statistics.quartile([1,2,3,4,5,6,7,8,9],:third)
161 |       7
162 | 
163 |   """
164 |   # TODO change these to call `percentile/2`
165 |   @spec quartile([number], :first | :third) :: number
166 |   def quartile(list, :first) do
167 |     list |> split |> elem(0) |> median
168 |   end
169 | 
170 |   def quartile(list, :third) do
171 |     list |> split |> elem(1) |> median
172 |   end
173 | 
174 |   @doc """
175 |   Get the nth percentile cutoff from a list
176 | 
177 |   ## Examples
178 | 
179 |       iex> Statistics.percentile([], 50)
180 |       nil
181 |       iex> Statistics.percentile([1], 50)
182 |       1
183 |       iex> Statistics.percentile([1,2,3,4,5,6,7,8,9],80)
184 |       7.4
185 |       iex> Statistics.percentile([1,2,3,4,5,6,7,8,9],100)
186 |       9
187 | 
188 |   """
189 |   @spec percentile([number], number) :: number | nil
190 |   def percentile([], _), do: nil
191 |   def percentile([x], _), do: x
192 |   def percentile(list, 0), do: min(list)
193 |   def percentile(list, 100), do: max(list)
194 | 
195 |   def percentile(list, n) when is_list(list) and is_number(n) do
196 |     s = Enum.sort(list)
197 |     r = n / 100.0 * (length(list) - 1)
198 |     f = :erlang.trunc(r)
199 |     lower = Enum.at(s, f)
200 |     upper = Enum.at(s, f + 1)
201 |     lower + (upper - lower) * (r - f)
202 |   end
203 | 
204 |   @doc """
205 |   Get range of data
206 | 
207 |   ## Examples
208 | 
209 |       iex> Statistics.range([1,2,3,4,5,6])
210 |       5
211 | 
212 |   """
213 |   @spec range([number]) :: number | nil
214 |   def range([]), do: nil
215 | 
216 |   def range(list) when is_list(list) do
217 |     max(list) - min(list)
218 |   end
219 | 
220 |   @doc """
221 |   Calculate the inter-quartile range
222 | 
223 |   ## Examples
224 | 
225 |       iex> Statistics.iqr([])
226 |       nil
227 |       iex> Statistics.iqr([1,2,3,4,5,6,7,8,9])
228 |       4
229 | 
230 |   """
231 |   @spec iqr([number]) :: number | nil
232 |   def iqr([]), do: nil
233 | 
234 |   def iqr(list) when is_list(list) do
235 |     {first, second} = split(list)
236 |     median(second) - median(first)
237 |   end
238 | 
239 |   @doc """
240 |   Calculate variance from a list of numbers
241 | 
242 |   ## Examples
243 | 
244 |       iex> Statistics.variance([])
245 |       nil
246 |       iex> Statistics.variance([1,2,3,4])
247 |       1.25
248 |       iex> Statistics.variance([55,56,60,65,54,51,39])
249 |       56.48979591836735
250 | 
251 |   """
252 |   @spec variance([number]) :: number | nil
253 |   def variance([]), do: nil
254 | 
255 |   def variance(list) when is_list(list) do
256 |     list_mean = mean(list)
257 |     list |> Enum.map(fn x -> (list_mean - x) * (list_mean - x) end) |> mean
258 |   end
259 | 
260 |   @doc """
261 |   Calculate the standard deviation of a list
262 | 
263 |   ## Examples
264 | 
265 |       iex> Statistics.stdev([])
266 |       nil
267 |       iex> Statistics.stdev([1,2])
268 |       0.5
269 | 
270 |   """
271 |   @spec stdev([number]) :: number | nil
272 |   def stdev([]), do: nil
273 | 
274 |   def stdev(list) do
275 |     list |> variance |> Math.sqrt()
276 |   end
277 | 
278 |   @doc """
279 |   Calculate the trimmed mean of a list.
280 | 
281 |   Can specify cutoff values as a tuple, or simply choose the IQR min/max as the cutoffs
282 | 
283 |   ## Examples
284 | 
285 |       iex> Statistics.trimmed_mean([], :iqr)
286 |       nil
287 |       iex> Statistics.trimmed_mean([1,2,3], {1,3})
288 |       2.0
289 |       iex> Statistics.trimmed_mean([1,2,3,4,5,5,6,6,7,7,8,8,10,11,12,13,14,15], :iqr)
290 |       7.3
291 | 
292 |   """
293 |   @spec trimmed_mean([number], atom | tuple) :: number | nil
294 |   def trimmed_mean([], _), do: nil
295 | 
296 |   def trimmed_mean(list, :iqr) do
297 |     {first, second} = split(list)
298 |     trimmed_mean(list, {median(first), median(second)})
299 |   end
300 | 
301 |   def trimmed_mean(list, {low, high}) do
302 |     list |> Enum.reject(fn x -> x < low or x > high end) |> mean
303 |   end
304 | 
305 |   @doc """
306 |   Calculates the harmonic mean from a list
307 | 
308 |   Harmonic mean is the number of values divided by
309 |   the sum of the reciprocal of all the values.
310 | 
311 |   ## Examples
312 | 
313 |       iex> Statistics.harmonic_mean([])
314 |       nil
315 |       iex> Statistics.harmonic_mean([1,2,3,4,5,6,7,8,9,10,11,12,13,14,15])
316 |       4.5204836768674568
317 | 
318 |   """
319 |   @spec harmonic_mean([number]) :: number | nil
320 |   def harmonic_mean([]), do: nil
321 | 
322 |   def harmonic_mean(list) when is_list(list) do
323 |     do_harmonic_mean(list, 0, 0)
324 |   end
325 | 
326 |   defp do_harmonic_mean([], t, l), do: l / t
327 | 
328 |   defp do_harmonic_mean([x | xs], t, l) do
329 |     do_harmonic_mean(xs, t + 1 / x, l + 1)
330 |   end
331 | 
332 |   @doc """
333 |   Calculate the geometric mean of a list
334 | 
335 |   Geometric mean is the nth root of the product of n values
336 | 
337 |   ## Examples
338 | 
339 |       iex> Statistics.geometric_mean([])
340 |       nil
341 |       iex> Statistics.geometric_mean([1,2,3])
342 |       1.8171205928321397
343 | 
344 |   """
345 |   @spec geometric_mean([number]) :: number | nil
346 |   def geometric_mean([]), do: nil
347 | 
348 |   def geometric_mean(list) when is_list(list) do
349 |     do_geometric_mean(list, 1, 0)
350 |   end
351 | 
352 |   defp do_geometric_mean([], p, l), do: Math.pow(p, 1 / l)
353 | 
354 |   defp do_geometric_mean([x | xs], p, l) do
355 |     do_geometric_mean(xs, p * x, l + 1)
356 |   end
357 | 
358 |   @doc """
359 |   Calculates the nth moment about the mean for a sample.
360 | 
361 |   Generally used to calculate coefficients of skewness and  kurtosis.
362 |   Returns the n-th central moment as a float
363 |   The denominator for the moment calculation is the number of
364 |   observations, no degrees of freedom correction is done.
365 | 
366 |   ## Examples
367 | 
368 |       iex> Statistics.moment([1,2,3,4,5,6,7,8,9,8,7,6,5,4,3],3)
369 |       -1.3440000000000025
370 |       iex> Statistics.moment([], 2)
371 |       nil
372 | 
373 |   """
374 |   @spec moment([number], pos_integer) :: number | nil
375 |   def moment(list, n \\ 1)
376 |   # empty list has no moment
377 |   def moment([], _), do: nil
378 |   # By definition the first moment about the mean is 0.
379 |   def moment(_, 1), do: 0.0
380 |   # Otherwise
381 |   def moment(list, n) when is_list(list) and is_number(n) do
382 |     lmean = mean(list)
383 |     list |> Enum.map(&Math.pow(&1 - lmean, n)) |> mean
384 |   end
385 | 
386 |   @doc """
387 |   Computes the skewness of a data set.
388 | 
389 |   For normally distributed data, the skewness should be about 0. A skewness
390 |   value > 0 means that there is more weight in the left tail of the
391 |   distribution.
392 | 
393 |   ## Examples
394 | 
395 |       iex> Statistics.skew([])
396 |       nil
397 |       iex> Statistics.skew([1,2,3,2,1])
398 |       0.3436215967445454
399 | 
400 |   """
401 |   @spec skew([number]) :: number | nil
402 |   def skew([]), do: nil
403 | 
404 |   def skew(list) do
405 |     m2 = moment(list, 2)
406 |     m3 = moment(list, 3)
407 |     m3 / Math.pow(m2, 1.5)
408 |   end
409 | 
410 |   @doc """
411 |   Computes the kurtosis (Fisher) of a list.
412 | 
413 |   Kurtosis is the fourth central moment divided by the square of the variance.
414 | 
415 |   ## Examples
416 | 
417 |       iex> Statistics.kurtosis([])
418 |       nil
419 |       iex> Statistics.kurtosis([1,2,3,2,1])
420 |       -1.1530612244897964
421 | 
422 |   """
423 |   @spec kurtosis([number]) :: number | nil
424 |   def kurtosis([]), do: nil
425 | 
426 |   def kurtosis(list) do
427 |     m2 = moment(list, 2)
428 |     m4 = moment(list, 4)
429 |     # pearson
430 |     p = m4 / Math.pow(m2, 2.0)
431 |     # fisher
432 |     p - 3
433 |   end
434 | 
435 |   @doc """
436 |   Calculate a standard `z` score for each item in a list
437 | 
438 |   ## Examples
439 | 
440 |       iex> Statistics.zscore([3,2,3,4,5,6,5,4,3])
441 |       [-0.7427813527082074, -1.5784103745049407, -0.7427813527082074,
442 |       0.09284766908852597, 0.9284766908852594, 1.7641057126819928,
443 |       0.9284766908852594, 0.09284766908852597, -0.7427813527082074]
444 | 
445 |   """
446 |   @spec zscore([number]) :: list | nil
447 |   def zscore(list) when is_list(list) do
448 |     lmean = mean(list)
449 |     lstdev = stdev(list)
450 |     for n <- list, do: (n - lmean) / lstdev
451 |   end
452 | 
453 |   @doc """
454 |   Calculate the the Pearson product-moment correlation coefficient of two lists.
455 | 
456 |   The two lists are presumed to represent matched pairs of observations, the `x` and `y` of a simple regression.
457 | 
458 |   ## Examples
459 | 
460 |       iex> Statistics.correlation([1,2,3,4], [1,3,5,6])
461 |       0.9897782665572894
462 | 
463 |   """
464 |   @spec correlation([number], [number]) :: number
465 |   def correlation(x, y) when length(x) == length(y) do
466 |     xmean = mean(x)
467 |     ymean = mean(y)
468 | 
469 |     numer =
470 |       Enum.zip(x, y)
471 |       |> Enum.map(fn {xi, yi} -> (xi - xmean) * (yi - ymean) end)
472 |       |> sum
473 | 
474 |     denom_x =
475 |       x
476 |       |> Enum.map(fn xi -> (xi - xmean) * (xi - xmean) end)
477 |       |> sum
478 | 
479 |     denom_y =
480 |       y
481 |       |> Enum.map(fn yi -> (yi - ymean) * (yi - ymean) end)
482 |       |> sum
483 | 
484 |     numer / Math.sqrt(denom_x * denom_y)
485 |   end
486 | 
487 |   @doc """
488 |   Calculate the covariance of two lists.
489 | 
490 |   Covariance is a measure of how much two random variables change together.
491 |   The two lists are presumed to represent matched pairs of observations, such as the `x` and `y` of a simple regression.
492 | 
493 |   ## Examples
494 | 
495 |       iex> Statistics.covariance([1,2,3,2,1], [1,4,5.2,7,99])
496 |       -17.89
497 | 
498 |   """
499 |   @spec covariance([number], [number]) :: number
500 |   def covariance(x, y) when length(x) == length(y) do
501 |     xmean = mean(x)
502 |     ymean = mean(y)
503 |     size = length(x)
504 | 
505 |     Enum.zip(x, y)
506 |     |> Enum.map(fn {xi, yi} -> (xi - xmean) * (yi - ymean) end)
507 |     |> Enum.map(fn i -> i / (size - 1) end)
508 |     |> sum
509 |   end
510 | 
511 |   ## helpers and other flotsam
512 | 
513 |   import Integer, only: [is_even: 1, is_odd: 1]
514 | 
515 |   # Split a list into two equal lists.
516 |   # Needed for getting the quartiles.
517 |   defp split(list) when is_list(list) do
518 |     do_split(Enum.sort(list), length(list))
519 |   end
520 | 
521 |   defp do_split(sorted_list, l) when is_even(l) do
522 |     m = :erlang.trunc(l / 2)
523 |     {Enum.take(sorted_list, m), Enum.drop(sorted_list, m)}
524 |   end
525 | 
526 |   defp do_split(sorted_list, l) when is_odd(l) do
527 |     m = :erlang.trunc((l + 1) / 2)
528 |     {Enum.take(sorted_list, m), Enum.drop(sorted_list, m - 1)}
529 |   end
530 | end
531 | 


--------------------------------------------------------------------------------
/lib/statistics/distributions/beta.ex:
--------------------------------------------------------------------------------
  1 | defmodule Statistics.Distributions.Beta do
  2 |   alias Statistics.Math
  3 |   alias Statistics.Math.Functions
  4 | 
  5 |   @moduledoc """
  6 |   The Beta distribution
  7 |   """
  8 | 
  9 |   @doc """
 10 |   The probability density function
 11 | 
 12 |   ## Examples
 13 | 
 14 |       iex> Statistics.Distributions.Beta.pdf(1,100).(0.1)
 15 |       0.0029512665430652825
 16 | 
 17 |   """
 18 |   @spec pdf(number, number) :: fun
 19 |   def pdf(a, b) do
 20 |     bab = Functions.beta(a, b)
 21 | 
 22 |     fn x ->
 23 |       cond do
 24 |         x <= 0.0 ->
 25 |           0.0
 26 | 
 27 |         true ->
 28 |           Math.pow(x, a - 1) * Math.pow(1 - x, b - 1) / bab
 29 |       end
 30 |     end
 31 |   end
 32 | 
 33 |   @doc """
 34 |   The cumulative density function
 35 | 
 36 |   ## Examples
 37 | 
 38 |       iex> Statistics.Distributions.Beta.cdf(1,100).(0.1)
 39 |       0.9996401052677814
 40 |       
 41 |   """
 42 |   @spec cdf(number, number) :: fun
 43 |   def cdf(a, b) do
 44 |     fn x ->
 45 |       Functions.simpson(pdf(a, b), 0, x, 10000)
 46 |     end
 47 |   end
 48 | 
 49 |   @doc """
 50 |   The percentile-point function
 51 | 
 52 |   ## Examples
 53 | 
 54 |       iex> Statistics.Distributions.Beta.ppf(1,100).(0.1)
 55 |       0.001053089271799999
 56 |       
 57 |   """
 58 |   @spec ppf(number, number) :: fun
 59 |   def ppf(a, b) do
 60 |     fn x ->
 61 |       ppf_tande(cdf(a, b), x)
 62 |     end
 63 |   end
 64 | 
 65 |   defp ppf_tande(cdf, x) do
 66 |     ppf_tande(cdf, x, 0.0, 14, 0)
 67 |   end
 68 | 
 69 |   defp ppf_tande(_, _, guess, precision, precision) do
 70 |     guess
 71 |   end
 72 | 
 73 |   defp ppf_tande(cdf, x, guess, precision, current_precision) do
 74 |     # add 1/10**precision'th of the max value to the min
 75 |     new_guess = guess + 1 / Math.pow(10, current_precision)
 76 |     # if it's less than the PPF we want, do it again
 77 |     if cdf.(new_guess) < x do
 78 |       ppf_tande(cdf, x, new_guess, precision, current_precision)
 79 |     else
 80 |       # otherwise (it's greater), increase the current_precision
 81 |       # and recurse with original guess
 82 |       ppf_tande(cdf, x, guess, precision, current_precision + 1)
 83 |     end
 84 |   end
 85 | 
 86 |   @doc """
 87 |   Draw a random number from a Beta distribution
 88 | 
 89 |   ## Examples
 90 | 
 91 |       iex> Statistics.Distributions.Beta.rand(1,100)
 92 |       0.005922672626035741
 93 | 
 94 |   """
 95 |   @spec rand(number, number) :: number
 96 |   def rand(a, b), do: rand(pdf(a, b))
 97 | 
 98 |   defp rand(rpdf) do
 99 |     # beta only exists between 0 and 1
100 |     x = Math.rand()
101 | 
102 |     if rpdf.(x) > Math.rand() do
103 |       x
104 |     else
105 |       # keep trying
106 |       rand(rpdf)
107 |     end
108 |   end
109 | end
110 | 


--------------------------------------------------------------------------------
/lib/statistics/distributions/binomial.ex:
--------------------------------------------------------------------------------
  1 | defmodule Statistics.Distributions.Binomial do
  2 |   alias Statistics.Math
  3 | 
  4 |   @moduledoc """
  5 |   Binomial distribution.
  6 | 
  7 |   This models the expected outcome of a number
  8 |   of binary trials, each with known probability,
  9 |   (often called a Bernoulli trial)
 10 |   """
 11 | 
 12 |   @doc """
 13 |   The probability mass function.
 14 | 
 15 |   Note that calling the mass function with a `Float` will return `nil` because
 16 |   this is a discrete probability distribution which only includes integer values.
 17 | 
 18 |   ## Examples
 19 | 
 20 |       iex> Statistics.Distributions.Binomial.pmf(4, 0.5).(2)
 21 |       0.375
 22 |       iex> Statistics.Distributions.Binomial.pmf(4, 0.5).(0.2)
 23 |       nil
 24 | 
 25 |   """
 26 |   @spec pmf(non_neg_integer, number) :: fun
 27 |   def pmf(n, p) do
 28 |     fn k ->
 29 |       cond do
 30 |         k < 0.0 ->
 31 |           0.0
 32 | 
 33 |         n < k ->
 34 |           0.0
 35 | 
 36 |         k != Math.to_int(k) ->
 37 |           nil
 38 | 
 39 |         true ->
 40 |           Math.combination(n, k) * Math.pow(p, k) * Math.pow(1 - p, n - k)
 41 |       end
 42 |     end
 43 |   end
 44 | 
 45 |   @doc """
 46 |   The cumulative density function
 47 | 
 48 |   ## Examples
 49 | 
 50 |       iex> Statistics.Distributions.Binomial.cdf(4, 0.5).(2)
 51 |       0.6875
 52 | 
 53 |   """
 54 |   @spec cdf(non_neg_integer, number) :: fun
 55 |   def cdf(n, p) do
 56 |     fn k ->
 57 |       0..Math.to_int(Math.floor(k))
 58 |       |> Enum.to_list()
 59 |       |> Enum.map(fn i -> Math.combination(n, i) * Math.pow(p, i) * Math.pow(1 - p, n - i) end)
 60 |       |> Enum.sum()
 61 |     end
 62 |   end
 63 | 
 64 |   @doc """
 65 |   The percentile-point function
 66 | 
 67 |   ## Examples
 68 | 
 69 |       iex> Statistics.Distributions.Binomial.ppf(10, 0.5).(0.5)
 70 |       5
 71 | 
 72 |   """
 73 |   @spec ppf(non_neg_integer, number) :: fun
 74 |   def ppf(n, p) do
 75 |     fn x ->
 76 |       ppf_tande(x, n, p, cdf(n, p), 0)
 77 |     end
 78 |   end
 79 | 
 80 |   # trial-and-error method which refines guesses
 81 |   # to arbitrary number of decimal places
 82 |   defp ppf_tande(x, n, p, npcdf, g) do
 83 |     g_cdf = npcdf.(g)
 84 | 
 85 |     cond do
 86 |       x > g_cdf ->
 87 |         ppf_tande(x, n, p, npcdf, g + 1)
 88 | 
 89 |       x <= g_cdf ->
 90 |         g
 91 |     end
 92 |   end
 93 | 
 94 |   @doc """
 95 |   Draw a random number from a binomial distribution
 96 | 
 97 |   Uses the [rejection sampling method](https://en.wikipedia.org/wiki/Rejection_sampling)
 98 |   and returns a rounded `Float`.
 99 | 
100 |   ## Examples
101 | 
102 |       iex> Statistics.Distributions.Binomial.rand(10, 0.5)
103 |       5.0
104 | 
105 |   """
106 |   @spec rand(non_neg_integer, number) :: non_neg_integer
107 |   def rand(n, p), do: rand(n, p, pmf(n, p))
108 | 
109 |   defp rand(n, p, rpmf) do
110 |     x = Math.rand() * n
111 | 
112 |     if rpmf.(x) > Math.rand() do
113 |       Float.round(x)
114 |     else
115 |       # keep trying
116 |       rand(n, p, rpmf)
117 |     end
118 |   end
119 | end
120 | 


--------------------------------------------------------------------------------
/lib/statistics/distributions/chisq.ex:
--------------------------------------------------------------------------------
  1 | defmodule Statistics.Distributions.Chisq do
  2 |   alias Statistics.Math
  3 |   alias Statistics.Math.Functions
  4 | 
  5 |   @moduledoc """
  6 |   Chi square distribution.
  7 | 
  8 |   Takes a *degrees of freedom* parameter.
  9 |   """
 10 | 
 11 |   @doc """
 12 |   The probability density function
 13 | 
 14 |   ## Examples
 15 | 
 16 |       iex> Statistics.Distributions.Chisq.pdf(1).(2)
 17 |       0.10377687435514868
 18 | 
 19 |   """
 20 |   @spec pdf(non_neg_integer) :: fun
 21 |   def pdf(df) do
 22 |     hdf = df / 2
 23 |     g = Math.pow(2, hdf) * Functions.gamma(hdf)
 24 | 
 25 |     fn x -> 1 / g * Math.pow(x, hdf - 1) * Math.exp(-1 * x / 2) end
 26 |   end
 27 | 
 28 |   @doc """
 29 |   The cumulative density function
 30 | 
 31 |   ## Examples
 32 | 
 33 |       iex> Statistics.Distributions.Chisq.cdf(2).(2)
 34 |       0.6321205588285578
 35 | 
 36 |   """
 37 |   @spec cdf(non_neg_integer) :: fun
 38 |   def cdf(df) do
 39 |     hdf = df / 2.0
 40 |     g = Functions.gamma(hdf)
 41 | 
 42 |     fn x ->
 43 |       b = Functions.gammainc(hdf, x / 2.0)
 44 |       b / g
 45 |     end
 46 |   end
 47 | 
 48 |   @doc """
 49 |   The percentile-point function
 50 | 
 51 |   ## Examples
 52 | 
 53 |       iex> Statistics.Distributions.Chisq.ppf(1).(0.95)
 54 |       3.841458820694101
 55 | 
 56 |   """
 57 |   @spec ppf(non_neg_integer) :: fun
 58 |   def ppf(df) do
 59 |     fn x ->
 60 |       ppf_tande(x, cdf(df))
 61 |     end
 62 |   end
 63 | 
 64 |   # trial-and-error method which refines guesses
 65 |   # to arbitrary number of decimal places
 66 |   defp ppf_tande(x, tcdf, precision \\ 14) do
 67 |     ppf_tande(x, tcdf, 0, precision + 2, 0)
 68 |   end
 69 | 
 70 |   defp ppf_tande(_, _, g, precision, precision) do
 71 |     g
 72 |   end
 73 | 
 74 |   defp ppf_tande(x, tcdf, g, precision, p) do
 75 |     increment = 100 / Math.pow(10, p)
 76 |     guess = g + increment
 77 | 
 78 |     if x < tcdf.(guess) do
 79 |       ppf_tande(x, tcdf, g, precision, p + 1)
 80 |     else
 81 |       ppf_tande(x, tcdf, guess, precision, p)
 82 |     end
 83 |   end
 84 | 
 85 |   @doc """
 86 |   Draw a random number from a t distribution with specified degrees of freedom
 87 | 
 88 |   Uses the [rejection sampling method](https://en.wikipedia.org/wiki/Rejection_sampling)
 89 | 
 90 |   ## Examples
 91 | 
 92 |       iex> Statistics.Distributions.Chisq.rand(2)
 93 |       1.232433646523534767
 94 | 
 95 |   """
 96 |   @spec rand(non_neg_integer) :: number
 97 |   def rand(df), do: rand(df, cdf(df))
 98 | 
 99 |   defp rand(df, rcdf) do
100 |     x = Math.rand() * 100
101 | 
102 |     if rcdf.(x) > Math.rand() do
103 |       x
104 |     else
105 |       # keep trying
106 |       rand(df, rcdf)
107 |     end
108 |   end
109 | end
110 | 


--------------------------------------------------------------------------------
/lib/statistics/distributions/exponential.ex:
--------------------------------------------------------------------------------
  1 | defmodule Statistics.Distributions.Exponential do
  2 |   @moduledoc """
  3 |   Exponential distribution.
  4 | 
  5 |   `lambda` is the rate parameter and must be greater than zero.
  6 |   """
  7 | 
  8 |   alias Statistics.Math
  9 | 
 10 |   @doc """
 11 |   The probability density function
 12 | 
 13 |   ## Examples
 14 | 
 15 |       iex> Statistics.Distributions.Exponential.pdf().(1)
 16 |       0.36787944117144233
 17 | 
 18 |   """
 19 |   @spec pdf() :: fun
 20 |   @spec pdf(number) :: fun
 21 |   def pdf() do
 22 |     pdf(1)
 23 |   end
 24 | 
 25 |   def pdf(lambda) do
 26 |     fn x ->
 27 |       cond do
 28 |         x < 0 ->
 29 |           0
 30 | 
 31 |         lambda <= 0 ->
 32 |           :nan
 33 | 
 34 |         true ->
 35 |           lambda * Math.exp(-lambda * x)
 36 |       end
 37 |     end
 38 |   end
 39 | 
 40 |   @doc """
 41 |   The cumulative density function
 42 | 
 43 |   ## Examples
 44 | 
 45 |       iex> Statistics.Distributions.Exponential.cdf().(1)
 46 |       0.6321205588285577
 47 | 
 48 |   """
 49 |   @spec cdf() :: fun
 50 |   @spec cdf(number) :: fun
 51 |   def cdf() do
 52 |     cdf(1)
 53 |   end
 54 | 
 55 |   def cdf(lambda) do
 56 |     fn x ->
 57 |       cond do
 58 |         x < 0 ->
 59 |           0
 60 | 
 61 |         lambda <= 0 ->
 62 |           :nan
 63 | 
 64 |         true ->
 65 |           1 - Math.exp(-lambda * x)
 66 |       end
 67 |     end
 68 |   end
 69 | 
 70 |   @doc """
 71 |   The percentile-point function
 72 | 
 73 |   ## Examples
 74 | 
 75 |       iex> Statistics.Distributions.Exponential.ppf().(0.1)
 76 |       0.10536051565782628
 77 | 
 78 |   """
 79 |   @spec ppf() :: fun
 80 |   @spec ppf(number) :: fun
 81 |   def ppf() do
 82 |     ppf(1)
 83 |   end
 84 | 
 85 |   def ppf(lambda) do
 86 |     fn x ->
 87 |       cond do
 88 |         x == 1 ->
 89 |           :inf
 90 | 
 91 |         x < 0 or x > 1 or lambda < 0 ->
 92 |           :nan
 93 | 
 94 |         true ->
 95 |           -1 * Math.ln(1 - x) / lambda
 96 |       end
 97 |     end
 98 |   end
 99 | 
100 |   @doc """
101 |   Draw a random variate from the distribution with specified lambda
102 | 
103 |   Uses the closed-form inverse CDF (PPF) evaluated with uniform number between 0.0 and 1.0
104 | 
105 |   ## Examples
106 | 
107 |       iex> Statistics.Distributions.Exponential.rand()
108 |       0.145709384787
109 | 
110 |   """
111 |   @spec rand() :: number
112 |   @spec rand(number) :: number
113 |   def rand() do
114 |     rand(1)
115 |   end
116 | 
117 |   def rand(lambda) do
118 |     ppf(lambda).(Math.rand())
119 |   end
120 | end
121 | 


--------------------------------------------------------------------------------
/lib/statistics/distributions/f.ex:
--------------------------------------------------------------------------------
  1 | defmodule Statistics.Distributions.F do
  2 |   alias Statistics.Math
  3 |   alias Statistics.Math.Functions
  4 |   alias Statistics.Distributions.Beta
  5 | 
  6 |   @moduledoc """
  7 |   The F distribution
  8 | 
  9 |   Note that `ppf/2` and `rand/2` here are very slow.
 10 |   """
 11 | 
 12 |   @doc """
 13 |   The probability density function
 14 | 
 15 |   ## Examples
 16 | 
 17 |       iex> Statistics.Distributions.F.pdf(1,1).(1)
 18 |       0.15915494309189537
 19 | 
 20 |   """
 21 |   @spec pdf(number, number) :: fun
 22 |   def pdf(d1, d2) do
 23 |     powa = Math.pow(d2, d2)
 24 |     cfac = Functions.beta(d1 / 2, d2 / 2)
 25 | 
 26 |     fn x ->
 27 |       # create components
 28 |       a = Math.pow(d1 * x, d1) * powa
 29 |       b = Math.pow(d1 * x + d2, d1 + d2)
 30 |       c = x * cfac
 31 |       # for the equation
 32 |       Math.sqrt(a / b) / c
 33 |     end
 34 |   end
 35 | 
 36 |   @doc """
 37 |   The cumulative density function
 38 | 
 39 |   ## Examples
 40 | 
 41 |       iex> Statistics.Distributions.F.cdf(1,1).(1)
 42 |       0.4971668763845647
 43 |       
 44 |   NOTE this is rather imprecise owing to the use
 45 |   of numerical integration of `Beta.pdf/2` to 
 46 |   approximate the regularised incomplete beta function
 47 |   """
 48 |   # NOTE the cdf is defined in terms of 
 49 |   # the regularised incomplete Beta function
 50 |   # which is the CDF of the Beta distribution
 51 |   @spec cdf(number, number) :: fun
 52 |   def cdf(d1, d2) do
 53 |     bcdf = Beta.cdf(d1 / 2, d2 / 2)
 54 | 
 55 |     fn x ->
 56 |       xx = d1 * x / (d1 * x + d2)
 57 |       bcdf.(xx)
 58 |     end
 59 |   end
 60 | 
 61 |   @doc """
 62 |   The percentile-point function
 63 | 
 64 |   ## Examples
 65 | 
 66 |       iex> Statistics.Distributions.F.ppf(1,1).(1)
 67 |       1.0180414899099999
 68 |       
 69 |   """
 70 |   @spec ppf(number, number) :: fun
 71 |   def ppf(d1, d2) do
 72 |     fn x ->
 73 |       ppf_tande(cdf(d1, d2), x)
 74 |     end
 75 |   end
 76 | 
 77 |   # trial-and-error method which refines guesses
 78 |   defp ppf_tande(cdf, x) do
 79 |     ppf_tande(cdf, x, 0.0, 14, 0)
 80 |   end
 81 | 
 82 |   defp ppf_tande(_, _, guess, precision, current_precision) when current_precision >= precision do
 83 |     guess
 84 |   end
 85 | 
 86 |   defp ppf_tande(cdf, x, guess, precision, current_precision) do
 87 |     # add 1/10**precision'th of the max value to the min
 88 |     new_guess = guess + 100_000 / Math.pow(10, current_precision)
 89 |     cg = cdf.(new_guess)
 90 |     # if it's less than the PPF we want, do it again
 91 |     if cg < x do
 92 |       ppf_tande(cdf, x, new_guess, precision, current_precision + 0.1)
 93 |     else
 94 |       # otherwise (it's greater), increase the current_precision
 95 |       # and recurse with original guess
 96 |       ppf_tande(cdf, x, guess, precision, current_precision + 1)
 97 |     end
 98 |   end
 99 | 
100 |   @doc """
101 |   Draw a random number from an F distribution 
102 |   """
103 |   @spec rand(number, number) :: number
104 |   def rand(d1, d2) do
105 |     ceil = ppf(d1, d2).(0.999)
106 |     do_rand(pdf(d1, d2), ceil)
107 |   end
108 | 
109 |   defp do_rand(pdf, ceil) do
110 |     x = Math.rand() * ceil
111 | 
112 |     if pdf.(x) > Math.rand() do
113 |       x
114 |     else
115 |       # keep trying
116 |       do_rand(pdf, ceil)
117 |     end
118 |   end
119 | end
120 | 


--------------------------------------------------------------------------------
/lib/statistics/distributions/hypergeometric.ex:
--------------------------------------------------------------------------------
  1 | defmodule Statistics.Distributions.Hypergeometric do
  2 |   @moduledoc """
  3 |   Hypergeometric distribution.
  4 | 
  5 |   It models the probability that an n numbers of trials
  6 |   result in exactly k successes, with a population of pn items,
  7 |   where pk are considered as successes.
  8 |   """
  9 | 
 10 |   alias Statistics.Math
 11 | 
 12 |   @doc """
 13 |   The probability mass function
 14 |   ## Examples
 15 |       iex> Statistics.Distributions.Hypergeometric.pmf(50, 5, 10).(4)
 16 |       0.003964583058015066
 17 |   """
 18 |   @spec pmf(non_neg_integer, non_neg_integer, non_neg_integer) :: fun
 19 |   def pmf(pn, pk, n) do
 20 |     combos = Math.combination(pn, n)
 21 | 
 22 |     fn k ->
 23 |       cond do
 24 |         n < k ->
 25 |           0.0
 26 | 
 27 |         pn < n ->
 28 |           0.0
 29 | 
 30 |         pn == pk && n != k ->
 31 |           0.0
 32 | 
 33 |         pn == pk ->
 34 |           1.0
 35 | 
 36 |         true ->
 37 |           xk = Math.to_int(k)
 38 |           Math.combination(pk, xk) * Math.combination(pn - pk, n - xk) / combos
 39 |       end
 40 |     end
 41 |   end
 42 | 
 43 |   @doc """
 44 |   The cumulative density function
 45 |   ## Examples
 46 |       iex> Statistics.Distributions.Hypergeometric.cdf(52, 5, 13).(2)
 47 |       0.9072328931572629
 48 |   """
 49 |   @spec cdf(non_neg_integer, non_neg_integer, non_neg_integer) :: fun
 50 |   def cdf(pn, pk, n) do
 51 |     cpmf = pmf(pn, pk, n)
 52 | 
 53 |     fn k ->
 54 |       0..Math.to_int(Math.floor(k))
 55 |       |> Enum.to_list()
 56 |       |> Enum.map(fn i -> cpmf.(i) end)
 57 |       |> Enum.sum()
 58 |     end
 59 |   end
 60 | 
 61 |   @doc """
 62 |   The percentile-point function
 63 |   ## Examples
 64 |       iex> Statistics.Distributions.Hypergeometric.ppf(80, 20, 50).(0.1)
 65 |       10
 66 |   """
 67 |   @spec ppf(non_neg_integer, non_neg_integer, non_neg_integer) :: fun
 68 |   def ppf(pn, pk, n) do
 69 |     fn x ->
 70 |       ppf_tande(x, cdf(pn, pk, n), 0)
 71 |     end
 72 |   end
 73 | 
 74 |   # trial-and-error method which refines guesses
 75 |   # to arbitrary number of decimal places
 76 | 
 77 |   defp ppf_tande(x, tcdf, guess) do
 78 |     g_cdf = tcdf.(guess)
 79 | 
 80 |     cond do
 81 |       x > g_cdf ->
 82 |         ppf_tande(x, tcdf, guess + 1)
 83 | 
 84 |       x <= g_cdf ->
 85 |         guess
 86 |     end
 87 |   end
 88 | 
 89 |   @doc """
 90 |   Draw a random number from hypergeometric distribution
 91 |   """
 92 |   @spec rand(non_neg_integer, non_neg_integer, non_neg_integer) :: non_neg_integer
 93 |   def rand(pn, pk, n), do: rand(pk, pmf(pn, pk, n))
 94 | 
 95 |   defp rand(pk, rpmf) do
 96 |     x = Math.floor(Math.rand() * pk)
 97 | 
 98 |     if rpmf.(x) > Math.rand() do
 99 |       Float.round(x)
100 |     else
101 |       # keep trying
102 |       rand(pk, rpmf)
103 |     end
104 |   end
105 | end
106 | 


--------------------------------------------------------------------------------
/lib/statistics/distributions/normal.ex:
--------------------------------------------------------------------------------
  1 | defmodule Statistics.Distributions.Normal do
  2 |   @moduledoc """
  3 |   The normal, or gaussian, distribution
  4 | 
  5 |   When invoking the distibution functions without parameters, 
  6 |   a distribution with mean of 0 and standard deviation of 1 is assumed.
  7 |   """
  8 | 
  9 |   alias Statistics.Math
 10 |   alias Statistics.Math.Functions
 11 | 
 12 |   @doc """
 13 |   Probability density function
 14 | 
 15 |   Roughly the expectation of a given value in the distribution
 16 | 
 17 |   ## Examples
 18 | 
 19 |       iex> Statistics.Distributions.Normal.pdf().(0)
 20 |       0.3989422804014327
 21 |       iex> Statistics.Distributions.Normal.pdf(0.2, 1).(1.3)
 22 |       0.21785217703255055
 23 | 
 24 |   """
 25 |   @spec pdf :: fun
 26 |   def pdf do
 27 |     pdf(0, 1)
 28 |   end
 29 | 
 30 |   @spec pdf(number, number) :: fun
 31 |   def pdf(mu, sigma) do
 32 |     fn x ->
 33 |       numexp = Math.pow(x - mu, 2) / (2 * Math.pow(sigma, 2))
 34 |       denom = sigma * Math.sqrt(2 * Math.pi())
 35 |       numer = Math.pow(Math.e(), numexp * -1)
 36 |       numer / denom
 37 |     end
 38 |   end
 39 | 
 40 |   @doc """
 41 |   The cumulative density function
 42 | 
 43 |   The probability that a value lies below `x`
 44 | 
 45 |   Cumulative gives a probability that a statistic
 46 |   is less than Z. This equates to the area of the distribution below Z.
 47 |   e.g:  Pr(Z = 0.69) = 0.7549. This value is usually given in Z tables.
 48 | 
 49 |   ## Examples
 50 | 
 51 |     iex> Statistics.Distributions.Normal.cdf().(2)
 52 |     0.9772499371127437
 53 |     iex> Statistics.Distributions.Normal.cdf(0,1).(0)
 54 |     0.5000000005
 55 | 
 56 |   """
 57 |   @spec cdf :: fun
 58 |   def cdf() do
 59 |     cdf(0, 1)
 60 |   end
 61 | 
 62 |   @spec cdf(number, number) :: fun
 63 |   def cdf(mu, sigma) do
 64 |     denom = sigma * Math.sqrt(2)
 65 | 
 66 |     fn x ->
 67 |       0.5 * (1.0 + Functions.erf((x - mu) / denom))
 68 |     end
 69 |   end
 70 | 
 71 |   @doc """
 72 |   The percentile-point function
 73 | 
 74 |   Get the maximum point which lies below the given probability.
 75 |   This is the inverse of the cdf
 76 | 
 77 |   ## Examples
 78 | 
 79 |       iex> Statistics.Distributions.Normal.ppf().(0.025)
 80 |       -1.96039491692534
 81 |       iex> Statistics.Distributions.Normal.ppf(7, 2.1).(0.25)
 82 |       5.584202805909036
 83 | 
 84 |   """
 85 |   @spec ppf :: fun
 86 |   def ppf() do
 87 |     ppf(0, 1)
 88 |   end
 89 | 
 90 |   @spec ppf(number, number) :: fun
 91 |   def ppf(mu, sigma) do
 92 |     res = fn p ->
 93 |       mu + p * sigma
 94 |     end
 95 | 
 96 |     fn x ->
 97 |       cond do
 98 |         x < 0.5 ->
 99 |           res.(-Functions.inv_erf(Math.sqrt(-2.0 * Math.ln(x))))
100 | 
101 |         x >= 0.5 ->
102 |           res.(Functions.inv_erf(Math.sqrt(-2.0 * Math.ln(1 - x))))
103 |       end
104 |     end
105 |   end
106 | 
107 |   @doc """
108 |   Draw a random number from a normal distribution
109 | 
110 |   `rnd/0` will return a random number from a normal distribution
111 |   with a mean of 0 and a standard deviation of 1
112 | 
113 |   `rnd/2` allows you to provide the mean and standard deviation
114 |   parameters of the distribution from which the random number is drawn
115 | 
116 |   Uses the [rejection sampling method](https://en.wikipedia.org/wiki/Rejection_sampling)
117 | 
118 |   ## Examples
119 | 
120 |       iex> Statistics.Distributions.Normal.rand()
121 |       1.5990817245679434
122 |       iex> Statistics.Distributions.Normal.rand(22, 2.3)
123 |       23.900248900049736
124 | 
125 |   """
126 |   @spec rand() :: number
127 |   def rand do
128 |     rand(0, 1)
129 |   end
130 | 
131 |   @spec rand(number, number) :: number
132 |   def rand(mu, sigma), do: rand(mu, sigma, pdf(0, 1))
133 | 
134 |   defp rand(mu, sigma, rpdf) do
135 |     # Note: an alternate method exists and may be better
136 |     # Inverse transform sampling - https://en.wikipedia.org/wiki/Inverse_transform_sampling
137 |     # ----
138 |     # Generate a random number between -10,+10
139 |     # (probability of 10 ocurring in a Normal(0,1) distribution is
140 |     # too small to calculate with the precision available to us)
141 |     x = Math.rand() * 20 - 10
142 | 
143 |     cond do
144 |       rpdf.(x) > Math.rand() ->
145 |         # transpose to specified distribution
146 |         mu - x * sigma
147 | 
148 |       true ->
149 |         # keep trying
150 |         rand(mu, sigma, rpdf)
151 |     end
152 |   end
153 | end
154 | 


--------------------------------------------------------------------------------
/lib/statistics/distributions/poisson.ex:
--------------------------------------------------------------------------------
  1 | defmodule Statistics.Distributions.Poisson do
  2 |   @moduledoc """
  3 |   The Poisson distribution is a discrete probablility distribution.
  4 | 
  5 |   It models the probability of a given number of events occurring
  6 |   in a fixed interval if the events occur with a known average rate
  7 |   and are independent of the previous event.
  8 | 
  9 |   """
 10 | 
 11 |   alias Statistics.Math
 12 | 
 13 |   @doc """
 14 |   Probability mass function
 15 | 
 16 |   ## Examples
 17 | 
 18 |       iex> Statistics.Distributions.Poisson.pmf(1).(1)
 19 |       0.36787944117144233
 20 | 
 21 |   """
 22 |   @spec pmf(number) :: fun
 23 |   def pmf(lambda) do
 24 |     nexp = Math.exp(-lambda)
 25 | 
 26 |     fn k ->
 27 |       Math.pow(lambda, k) / Math.factorial(k) * nexp
 28 |     end
 29 |   end
 30 | 
 31 |   @doc """
 32 |   Get the probability that a value lies below `k`
 33 | 
 34 |   ## Examples
 35 | 
 36 |     iex> Statistics.Distributions.Poisson.cdf(1).(1)
 37 |     0.73575888234288467
 38 | 
 39 |   """
 40 |   @spec cdf(number) :: fun
 41 |   def cdf(lambda) do
 42 |     nexp = Math.exp(-1 * lambda)
 43 | 
 44 |     fn k ->
 45 |       s =
 46 |         Enum.map(0..Math.to_int(k), fn x -> Math.pow(lambda, x) / Math.factorial(x) end)
 47 |         |> Enum.sum()
 48 | 
 49 |       nexp * s
 50 |     end
 51 |   end
 52 | 
 53 |   @doc """
 54 |   The percentile-point function
 55 | 
 56 |   Get the maximum point which lies below the given probability.
 57 |   This is the inverse of the cdf and will take only positive integer values
 58 |   (but returns a float)
 59 | 
 60 |   ## Examples
 61 | 
 62 |       iex> Statistics.Distributions.Poisson.ppf(1).(0.95)
 63 |       3.0
 64 | 
 65 |   """
 66 |   @spec ppf(number) :: fun
 67 |   def ppf(lambda) do
 68 |     lcdf = cdf(lambda)
 69 | 
 70 |     fn x ->
 71 |       ppf_tande(x, lcdf, 0.0)
 72 |     end
 73 |   end
 74 | 
 75 |   # the trusty trial-and-error method
 76 |   defp ppf_tande(x, lcdf, guess) do
 77 |     if x > lcdf.(guess) do
 78 |       ppf_tande(x, lcdf, guess + 1)
 79 |     else
 80 |       guess
 81 |     end
 82 |   end
 83 | 
 84 |   @doc """
 85 |   Draw a random number from this distribution
 86 | 
 87 |   This is a discrete distribution and the values it can take are positive integers.
 88 | 
 89 |   ## Examples
 90 | 
 91 |       iex> Statistics.Distributions.Poisson.rand(1)
 92 |       1.0
 93 | 
 94 |   """
 95 |   @spec rand(number) :: number
 96 |   def rand(lambda), do: rand(lambda, pmf(lambda))
 97 | 
 98 |   defp rand(lambda, lpmf) do
 99 |     x = (Math.rand() * 100 + lambda) |> Math.floor()
100 | 
101 |     if lpmf.(x) > Math.rand() do
102 |       x
103 |     else
104 |       # keep trying
105 |       rand(lambda, lpmf)
106 |     end
107 |   end
108 | end
109 | 


--------------------------------------------------------------------------------
/lib/statistics/distributions/t.ex:
--------------------------------------------------------------------------------
  1 | defmodule Statistics.Distributions.T do
  2 |   alias Statistics.Math
  3 |   alias Statistics.Math.Functions
  4 | 
  5 |   @moduledoc """
  6 |   Student's t distribution.
  7 | 
  8 |   This distribution is always centered around 0.0 and allows a *degrees of freedom* parameter.
  9 |   """
 10 | 
 11 |   @doc """
 12 |   The probability density function
 13 | 
 14 |   ## Examples
 15 | 
 16 |       iex> Statistics.Distributions.T.pdf(3).(0)
 17 |       0.3675525969478612
 18 |       iex> Statistics.Distributions.T.pdf(1).(3.2)
 19 |       0.028319384891796327
 20 | 
 21 |   """
 22 |   @spec pdf(number) :: fun
 23 |   def pdf(df) do
 24 |     fac = Functions.gamma((df + 1) / 2) / (Math.sqrt(df * Math.pi()) * Functions.gamma(df / 2))
 25 |     exp = (df + 1) / 2 * -1
 26 | 
 27 |     fn x -> fac * Math.pow(1 + x * x / df, exp) end
 28 |   end
 29 | 
 30 |   @doc """
 31 |   The cumulative density function
 32 | 
 33 |   NOTE: this currently uses the very slow Simpson's Rule to execute
 34 |   a numerical integration of the `pdf` function to approximate
 35 |   the CDF. This leads to a trade-off between precision and speed.
 36 | 
 37 |   A robust implementation of the 2F1 hypergeometric function is
 38 |   required to properly calculate the CDF of the t distribution.
 39 | 
 40 |   ## Examples
 41 | 
 42 |       iex> Statistics.Distributions.T.cdf(3).(0)
 43 |       0.4909182507070275
 44 |       
 45 |   """
 46 |   @spec cdf(number) :: fun
 47 |   def cdf(df) do
 48 |     cpdf = pdf(df)
 49 |     fn x -> Functions.simpson(cpdf, -10000, x, 10000) end
 50 |   end
 51 | 
 52 |   # when a robust hyp2F1 materialises, use this implementation
 53 |   # defp cdf_hyp2f1(x, df) do
 54 |   #  p1 = 0.5 + x * Functions.gamma((df+1)/2)
 55 |   #  p2n = Math.hyp2f1(0.5, ((df+1)/2), 1.5, -1*Math.pow(x,2)/df)
 56 |   #  p2d = Math.sqrt(Math.pi*df) * Functions.gamma(df/2)
 57 |   #  p1 * (p2n / p2d)
 58 |   # end
 59 | 
 60 |   @doc """
 61 |   The percentile-point function
 62 | 
 63 |   NOTE: this is very slow due to the current implementation of the CDF
 64 | 
 65 |   """
 66 |   @spec ppf(number) :: fun
 67 |   def ppf(df) do
 68 |     fn x ->
 69 |       ppf_tande(x, cdf(df), 4)
 70 |     end
 71 |   end
 72 | 
 73 |   # trial-and-error method which refines guesses
 74 |   # to arbitrary number of decimal places
 75 |   defp ppf_tande(x, pcdf, precision) do
 76 |     ppf_tande(x, pcdf, -10, precision + 2, 0)
 77 |   end
 78 | 
 79 |   defp ppf_tande(_, _, g, precision, precision) do
 80 |     g
 81 |   end
 82 | 
 83 |   defp ppf_tande(x, pcdf, g, precision, p) do
 84 |     increment = 100 / Math.pow(10, p)
 85 |     guess = g + increment
 86 | 
 87 |     if x < pcdf.(guess) do
 88 |       ppf_tande(x, pcdf, g, precision, p + 1)
 89 |     else
 90 |       ppf_tande(x, pcdf, guess, precision, p)
 91 |     end
 92 |   end
 93 | 
 94 |   @doc """
 95 |   Draw a random number from a t distribution with specified degrees of freedom
 96 |   """
 97 |   @spec rand(number) :: number
 98 |   def rand(df), do: randf(pdf(df))
 99 | 
100 |   defp randf(rpdf) do
101 |     # t-dist is fatter-tailed than normal
102 |     x = Math.rand() * 50 - 25
103 | 
104 |     if rpdf.(x) > Math.rand() do
105 |       x
106 |     else
107 |       # keep trying
108 |       randf(rpdf)
109 |     end
110 |   end
111 | end
112 | 


--------------------------------------------------------------------------------
/lib/statistics/tests/t_test.ex:
--------------------------------------------------------------------------------
 1 | defmodule Statistics.Tests.TTest do
 2 |   import Statistics
 3 |   import Statistics.Math
 4 |   alias Statistics.Distributions.T
 5 | 
 6 |   @moduledoc """
 7 |   Student's t test
 8 | 
 9 |   """
10 | 
11 |   @doc """
12 |   A two-sided test for the null hypothesis that the 
13 |   expected value (mean) of a sample of independent
14 |   observations a is equal to the given population mean, `popmean`.
15 | 
16 |   Returns the _t_ statistic, and the _p_ value.
17 | 
18 |   ## Example
19 | 
20 |       iex> Statistics.Tests.TTest.one_sample([1,2,3,2,1], 3)
21 |       %{p: 0.023206570788795993, t: -3.585685828003181}
22 | 
23 |   """
24 |   def one_sample(list, popmean) do
25 |     df = length(list) - 1
26 |     t = (mean(list) - popmean) / (stdev(list) / sqrt(length(list)))
27 |     p = get_t_prob(t, df)
28 |     %{t: t, p: p}
29 |   end
30 | 
31 |   @doc """
32 |   A two-sided test for the null hypothesis that the 
33 |   mean of `list1` is different to the mean of `list2`.
34 | 
35 |   The variance of the lists should be equal but the 
36 |   sample size of each last can be different.
37 | 
38 |   Returns the _t_ statistic, and the _p_ value.
39 | 
40 |   ## Example
41 | 
42 |       iex> Statistics.Tests.TTest.ind_samples([1,2,3,2,1], [3,2,4,3,5])
43 |       %{p: 0.022802155958137702, t: -2.82842712474619}
44 | 
45 |       iex> Statistics.Tests.TTest.ind_samples([1,2,3,2,1], [3,2,4,3,5,4,5,6])
46 |       %{p: 0.0044530673387188, t: -3.5858542135407596}
47 | 
48 |   """
49 |   def ind_samples(list1, list2) do
50 |     df = length(list1) + length(list2) - 2
51 |     mu1 = mean(list1)
52 |     mu2 = mean(list2)
53 |     # calculate pooled standard deviation and
54 |     # sample proportion differently when 
55 |     # sample sizes are unequal
56 |     {sp, sz} =
57 |       case length(list1) == length(list2) do
58 |         true ->
59 |           spt = sqrt((variance(list1) + variance(list2)) / 2)
60 |           szt = sqrt(2 / length(list1))
61 |           {spt, szt}
62 | 
63 |         false ->
64 |           # weight variances by sample size
65 |           adj_var1 = (length(list1) - 1) * variance(list1)
66 |           adj_var2 = (length(list2) - 1) * variance(list2)
67 |           spf = sqrt((adj_var1 + adj_var2) / df)
68 |           szf = sqrt(1 / length(list1) + 1 / length(list2))
69 |           {spf, szf}
70 |       end
71 | 
72 |     t = (mu1 - mu2) / (sp * sz)
73 |     p = get_t_prob(t, df)
74 |     %{t: t, p: p}
75 |   end
76 | 
77 |   defp get_t_prob(t, df) do
78 |     c = T.cdf(df).(t)
79 | 
80 |     p =
81 |       case t < 0.0 do
82 |         true -> c
83 |         false -> 1 - c
84 |       end
85 | 
86 |     # two-sided test
87 |     case p < 0.5 do
88 |       true -> 2 * p
89 |       false -> 1.0
90 |     end
91 |   end
92 | end
93 | 


--------------------------------------------------------------------------------
/mix.exs:
--------------------------------------------------------------------------------
 1 | defmodule Statistics.Mixfile do
 2 |   use Mix.Project
 3 | 
 4 |   @version "0.6.3"
 5 | 
 6 |   def project do
 7 |     [
 8 |       app: :statistics,
 9 |       version: @version,
10 |       elixir: ">= 1.11.4",
11 |       description: description(),
12 |       package: package(),
13 |       deps: deps()
14 |     ]
15 |   end
16 | 
17 |   def application do
18 |     []
19 |   end
20 | 
21 |   defp deps do
22 |     [
23 |       {:ex_doc, "~> 0.31", only: :dev, runtime: false}
24 |     ]
25 |   end
26 | 
27 |   defp description do
28 |     """
29 |     Functions for descriptive statistics and common distributions
30 |     """
31 |   end
32 | 
33 |   defp package do
34 |     [
35 |       files: ["lib", "mix.exs", "README*", "LICENSE*"],
36 |       maintainers: ["Max Sharples", "Kash Nouroozi"],
37 |       licenses: ["Apache-2.0"],
38 |       links: %{"GitHub" => "https://github.com/msharp/elixir-statistics"}
39 |     ]
40 |   end
41 | end
42 | 


--------------------------------------------------------------------------------
/package.exs:
--------------------------------------------------------------------------------
 1 | Expm.Package.new(
 2 |     name: "statistics",
 3 |     description: "General statistical functions",
 4 |     keywords: ["statistics","stats"],
 5 |     version: File.read!("VERSION") |> String.strip,
 6 |     licenses: [[name: "MIT"]],
 7 |     maintainers: [[name: "Max Sharples", email: "maxsharples@gmail.com"]],
 8 |     repositories: [[github: "msharp/elixir-statistics"]]
 9 | )
10 | 


--------------------------------------------------------------------------------
/test/beta_distribution_test.exs:
--------------------------------------------------------------------------------
 1 | defmodule BetaDistributionTest do
 2 |   use ExUnit.Case, async: true
 3 |   doctest Statistics.Distributions.Beta, except: [rand: 2]
 4 | 
 5 |   alias Statistics.Distributions.Beta
 6 | 
 7 |   test "output of the pdf function" do
 8 |     assert Beta.pdf(1, 3).(0.6) == 0.48000000000000054
 9 |     assert Beta.pdf(2, 5).(0.2) == 2.457600000000004
10 |     assert Beta.pdf(2, 2).(0.8) == 0.9600000000000015
11 |   end
12 | 
13 |   test "return a cdf " do
14 |     assert Beta.cdf(1, 1).(0.5) == 0.4999833333333332
15 |     assert Beta.cdf(2, 10).(0.1) == 0.30264311979999975
16 |     assert Beta.cdf(2, 5).(0.2) == 0.34464000000000033
17 |   end
18 | 
19 |   test "return a random number from the distribution" do
20 |     assert is_float(Beta.rand(1, 2))
21 |   end
22 | 
23 |   test "get the percentile point value" do
24 |     assert Beta.ppf(1, 2).(0.1) == 0.05131850509960005
25 |     # the PPF is expensive - don't run all tests every time
26 |     # assert Beta.ppf(2, 5).(0.5) ==  0.26444998329559966
27 |     # assert Beta.ppf(2, 10).(0.9) == 0.3102434478125001
28 |   end
29 | end
30 | 


--------------------------------------------------------------------------------
/test/binomial_distribution_test.exs:
--------------------------------------------------------------------------------
 1 | defmodule BinomialDistributionTest do
 2 |   use ExUnit.Case, async: true
 3 |   doctest Statistics.Distributions.Binomial, except: [rand: 2]
 4 | 
 5 |   alias Statistics.Distributions.Binomial, as: Binom
 6 |   alias Statistics.Math
 7 | 
 8 |   test "output of the pmf function" do
 9 |     assert Binom.pmf(1, 0.5).(0) == 0.5
10 |     assert Binom.pmf(1, 0.5).(1) == 0.5
11 |     assert Binom.pmf(4, 0.5).(4) == 0.0625
12 |     assert Binom.pmf(4, 0.5).(2) == 0.375
13 |     assert Binom.pmf(4, 0.5).(0.9) == nil
14 |     assert Binom.pmf(100, 0.2).(20) == 0.09930021480882524
15 |     assert Binom.pmf(5000, 0.0001).(1) == 0.30328807662005114
16 |   end
17 | 
18 |   test "return a cdf " do
19 |     assert Binom.cdf(4, 0.5).(2) == 0.6875
20 |     assert Binom.cdf(100, 0.2).(20) == 0.5594615848734007
21 |   end
22 | 
23 |   test "get the percentile point value" do
24 |     assert Binom.ppf(10, 0.5).(0.5) == 5.0
25 |     assert Binom.ppf(50, 0.2).(0.6) == 11.0
26 |   end
27 | 
28 |   test "return a random number from binomial distribution" do
29 |     n = 100
30 |     r = Binom.rand(n, 0.5)
31 |     # will return a float
32 |     assert is_float(r)
33 |     # but it should be an integer
34 |     assert r == Math.to_int(r)
35 |     # cannot be greater than number of trials
36 |     assert r <= n
37 |     # cannot be less than zero
38 |     assert r >= 0
39 |   end
40 | end
41 | 


--------------------------------------------------------------------------------
/test/chisq_distribution_test.exs:
--------------------------------------------------------------------------------
 1 | defmodule ChisqDistributionTest do
 2 |   use ExUnit.Case, async: true
 3 |   doctest Statistics.Distributions.Chisq, except: [rand: 1]
 4 | 
 5 |   alias Statistics.Distributions.Chisq, as: Chisq
 6 | 
 7 |   test "output of the pdf function" do
 8 |     assert Chisq.pdf(2).(5) == 0.0410424993119494
 9 |     assert Chisq.pdf(22).(12) == 0.020651546706168852
10 |   end
11 | 
12 |   test "return a cdf " do
13 |     assert Chisq.cdf(1).(1) == 0.6826894921370861
14 |     assert Chisq.cdf(2).(2) == 0.6321205588285578
15 |     assert Chisq.cdf(23).(16.8) == 0.18105083862291943
16 |     assert Chisq.cdf(77).(89.999) == 0.8524000316322364
17 |   end
18 | 
19 |   test "return a random number from the distribution" do
20 |     assert is_float(Chisq.rand(2))
21 |     # rands = for _ <- 0..10000, do: Chisq.rand(1)
22 |     # assert Statistics.mean(rands) == 1
23 |   end
24 | 
25 |   test "get the percentile point value" do
26 |     assert Chisq.ppf(77).(0.95) == 98.48438345933911
27 |     assert Chisq.ppf(7).(0.05) == 2.167349909298
28 |   end
29 | end
30 | 


--------------------------------------------------------------------------------
/test/descriptive_test.exs:
--------------------------------------------------------------------------------
  1 | defmodule DescriptiveTest do
  2 |   use ExUnit.Case, async: true
  3 |   doctest Statistics
  4 | 
  5 |   @null []
  6 | 
  7 |   @a Enum.to_list(1..9)
  8 |   @b [4, 3, 3, 4, 5, 6, 7, 6, 5]
  9 |   @c Enum.to_list(1..15)
 10 |   @d @a ++ [8, 7, 6, 5, 4, 3]
 11 |   @e [1, 2, 3, 2, 1]
 12 |   @f Enum.to_list(1..6)
 13 |   @g [1]
 14 | 
 15 |   @x [1, 2, 3, 4, 12, 4, 2, 4, 6, 3, 5, 6, 7, 4, 7, 8, 2, 5]
 16 |   @y [1, 3, 5, 6, 5, 2, 7, 4, 6, 8, 2, 3, 9, 5, 2, 8, 9, 4]
 17 | 
 18 |   test "sum a list" do
 19 |     assert Statistics.sum(@a) == 45
 20 |   end
 21 | 
 22 |   test "calculate mean" do
 23 |     assert Statistics.mean(@null) == nil
 24 |     assert Statistics.mean([1]) == 1
 25 |     assert Statistics.mean(@a) == 5
 26 |   end
 27 | 
 28 |   test "get mode" do
 29 |     assert Statistics.mode(@null) == nil
 30 |     assert Statistics.mode(@a ++ [2, 2]) == 2
 31 |   end
 32 | 
 33 |   test "calculate median" do
 34 |     assert Statistics.median(@null) == nil
 35 |     assert Statistics.median(@a) == 5
 36 |     assert Statistics.median(@a -- [9]) == 4.5
 37 |   end
 38 | 
 39 |   test "get maximum" do
 40 |     assert Statistics.max(@null) == nil
 41 |     assert Statistics.max(@a ++ [99]) == 99
 42 |   end
 43 | 
 44 |   test "get minimum" do
 45 |     assert Statistics.min(@null) == nil
 46 |     assert Statistics.min([23, 45, 34, 53, 44, 65, 99, 1, 74, 32, 69]) == 1
 47 |   end
 48 | 
 49 |   test "get first quartile point" do
 50 |     assert Statistics.quartile(@a ++ [5], :first) == 3
 51 |     assert Statistics.quartile(@a, :first) == 3
 52 |   end
 53 | 
 54 |   test "get third quartile point" do
 55 |     assert Statistics.quartile(@a ++ [5], :third) == 7
 56 |     assert Statistics.quartile(@a, :third) == 7
 57 |   end
 58 | 
 59 |   test "get nth percentile score" do
 60 |     assert Statistics.percentile(@null, 12) == nil
 61 | 
 62 |     assert Statistics.percentile(@a, 0) == 1
 63 |     assert Statistics.percentile(@a, 20) == 2.6
 64 |     assert Statistics.percentile(@a, 80) == 7.4
 65 |     assert Statistics.percentile(@a, 100) == 9
 66 |     assert Statistics.percentile(@g, 50) == 1
 67 |   end
 68 | 
 69 |   test "get range" do
 70 |     assert Statistics.range(@null) == nil
 71 |     assert Statistics.range(@a) == 8
 72 |   end
 73 | 
 74 |   test "get inter-quartile range" do
 75 |     assert Statistics.iqr(@null) == nil
 76 |     assert Statistics.iqr(@a) == 4
 77 |   end
 78 | 
 79 |   test "calculate variance" do
 80 |     assert Statistics.variance(@null) == nil
 81 |     assert Statistics.variance(@b) == 1.7283950617283952
 82 |   end
 83 | 
 84 |   test "calculate standard deviation" do
 85 |     assert Statistics.stdev(@null) == nil
 86 |     assert Statistics.stdev(@b) == 1.314684396244359
 87 |   end
 88 | 
 89 |   test "calculate trimmed mean" do
 90 |     assert Statistics.trimmed_mean(@null, {1, 4}) == nil
 91 |     assert Statistics.trimmed_mean(@c, {4, 9}) == 6.5
 92 |     assert Statistics.trimmed_mean((@c ++ [5, 6, 7, 8]) -- [9], :iqr) == 7.3
 93 |   end
 94 | 
 95 |   test "calculate harmonic mean" do
 96 |     assert Statistics.harmonic_mean(@null) == nil
 97 |     assert Statistics.harmonic_mean(@c) == 4.5204836768674568
 98 |   end
 99 | 
100 |   test "calculate geometric mean" do
101 |     assert Statistics.geometric_mean(@null) == nil
102 |     assert Statistics.geometric_mean(@f) == 2.9937951655239088
103 |   end
104 | 
105 |   # moment/skew/kurtosis numbers match python/scipy
106 | 
107 |   test "calculate moment about the mean" do
108 |     assert Statistics.moment(@null, 3) == nil
109 | 
110 |     assert Statistics.moment(@d, 1) == 0.0
111 |     assert Statistics.moment(@d, 2) == 5.2266666666666675
112 |     assert Statistics.moment(@d, 3) == -1.3440000000000025
113 |   end
114 | 
115 |   test "calculate skewness" do
116 |     assert Statistics.skew(@null) == nil
117 |     assert Statistics.skew(@e) == 0.3436215967445454
118 |   end
119 | 
120 |   test "calculate kurtosis (fisher)" do
121 |     assert Statistics.kurtosis(@null) == nil
122 |     assert Statistics.kurtosis(@e) == -1.1530612244897964
123 |   end
124 | 
125 |   test "calculate standard score for items in a list" do
126 |     expected = [
127 |       -0.7427813527082074,
128 |       -1.5784103745049407,
129 |       -0.7427813527082074,
130 |       0.09284766908852597,
131 |       0.9284766908852594,
132 |       1.7641057126819928,
133 |       0.9284766908852594,
134 |       0.09284766908852597,
135 |       -0.7427813527082074
136 |     ]
137 | 
138 |     assert Statistics.zscore([3, 2, 3, 4, 5, 6, 5, 4, 3]) == expected
139 |   end
140 | 
141 |   test "calculate the correlation of 2 lists" do
142 |     assert Statistics.correlation(@x, @y) == 0.09315273948675289
143 |     assert_raise FunctionClauseError, fn -> Statistics.correlation(@x, @null) end
144 |   end
145 | 
146 |   test "calculate the covariance of 2 lists" do
147 |     assert Statistics.covariance(@x, @y) == 0.6307189542483661
148 |     assert_raise FunctionClauseError, fn -> Statistics.covariance(@x, @null) end
149 |   end
150 | end
151 | 


--------------------------------------------------------------------------------
/test/exponential_distribution_test.exs:
--------------------------------------------------------------------------------
 1 | defmodule ExponentialDistributionTest do
 2 |   use ExUnit.Case, async: true
 3 |   doctest Statistics.Distributions.Exponential, except: [rand: 1, rand: 0]
 4 | 
 5 |   alias Statistics.Distributions.Exponential
 6 | 
 7 |   test "output of the pdf function" do
 8 |     assert Exponential.pdf().(-1) == 0
 9 |     assert Exponential.pdf(0).(1) == :nan
10 |     assert Exponential.pdf(-1).(1) == :nan
11 |     assert Exponential.pdf().(1) == 0.36787944117144233
12 |     assert Exponential.pdf(3).(2) == 0.0074362565299990755
13 |     assert Exponential.pdf(2).(9) == 3.0459959489425258e-08
14 |   end
15 | 
16 |   test "return a cdf " do
17 |     assert Exponential.cdf().(-1) == 0
18 |     assert Exponential.cdf(0).(1) == :nan
19 |     assert Exponential.cdf(-1).(1) == :nan
20 |     assert Exponential.cdf().(1) == 0.63212055882855767
21 |     assert Exponential.cdf(3).(2) == 0.9975212478233336
22 |     assert Exponential.cdf(2).(9) == 0.99999998477002028
23 |   end
24 | 
25 |   test "return a random number from the distribution" do
26 |     assert is_float(Exponential.rand(2))
27 |   end
28 | 
29 |   test "get the percentile point value" do
30 |     assert Exponential.ppf().(-1) == :nan
31 |     assert Exponential.ppf(1).(1.2) == :nan
32 |     assert Exponential.ppf().(0) == 0
33 |     assert Exponential.ppf(1).(1) == :inf
34 |     assert Exponential.ppf(1).(0.5) == 0.6931471805599453
35 |     assert Exponential.ppf(4).(0.9) == 0.57564627324851148
36 |   end
37 | 
38 |   test "generating many random variates gives roughly the expected mean" do
39 |     n = 100_000
40 |     lambda = 0.002
41 |     expected_mean = 1 / lambda
42 |     sample_mean = Enum.sum(Enum.map(1..n, fn _ -> Exponential.rand(lambda) end)) / n
43 | 
44 |     assert 0.95 * expected_mean <= sample_mean
45 |     assert sample_mean <= 1.05 * expected_mean
46 |   end
47 | end
48 | 


--------------------------------------------------------------------------------
/test/f_distribution_test.exs:
--------------------------------------------------------------------------------
 1 | defmodule FDistributionTest do
 2 |   use ExUnit.Case, async: true
 3 |   doctest Statistics.Distributions.F, except: [rand: 2, ppf: 2]
 4 | 
 5 |   alias Statistics.Distributions.F
 6 | 
 7 |   test "output of the pdf function" do
 8 |     assert F.pdf(1, 1).(1) == 0.15915494309189537
 9 |   end
10 | 
11 |   test "return a cdf " do
12 |     assert F.cdf(1, 1).(1) == 0.4971668763845647
13 |   end
14 | 
15 |   test "return a random number from the distribution" do
16 |     assert is_float(F.rand(1, 1))
17 |   end
18 | 
19 |   test "get the percentile point value" do
20 |     assert F.ppf(1, 1).(0.05) == 0.0048621122317455395
21 |   end
22 | end
23 | 


--------------------------------------------------------------------------------
/test/hypergeometric_distribution_test.exs:
--------------------------------------------------------------------------------
 1 | defmodule HypergeometricDistributionTest do
 2 |   use ExUnit.Case, async: true
 3 |   doctest Statistics.Distributions.Hypergeometric, except: [rand: 2]
 4 | 
 5 |   alias Statistics.Distributions.Hypergeometric, as: Hyper
 6 |   alias Statistics.Math
 7 | 
 8 |   test "output of the pmf function" do
 9 |     assert Hyper.pmf(52, 5, 26).(2) == 0.3251300520208083
10 |     assert Hyper.pmf(20, 10, 5).(1) == 0.13544891640866874
11 |     assert Hyper.pmf(10, 10, 2).(1) == 0.0
12 |     assert Hyper.pmf(10, 10, 2).(2) == 1.0
13 |   end
14 | 
15 |   test "return a cdf " do
16 |     assert Hyper.cdf(52, 5, 13).(2) == 0.9072328931572629
17 |     assert Hyper.cdf(80, 50, 23).(10) == 0.02480510161897441
18 |     assert Hyper.cdf(22, 9, 14).(2) == 0.0014916971573318324
19 |   end
20 | 
21 |   test "get the percentile point value" do
22 |     assert Hyper.ppf(80, 20, 50).(0.1) == 10.0
23 |     assert Hyper.ppf(70, 10, 30).(0.75) == 5.0
24 |   end
25 | 
26 |   test "return a random number from hypergeometric distribution" do
27 |     pn = 100
28 |     pk = 5
29 |     n = 10
30 |     r = Hyper.rand(pn, pk, n)
31 |     # will return a float
32 |     assert is_float(r)
33 |     # but it should be an integer
34 |     assert r == Math.to_int(r)
35 |     # cannot be greater than number of trials
36 |     assert r <= n
37 |     # cannot be less than zero
38 |     assert r >= 0
39 |   end
40 | end
41 | 


--------------------------------------------------------------------------------
/test/math_functions_test.exs:
--------------------------------------------------------------------------------
 1 | defmodule MathFunctionsTest do
 2 |   use ExUnit.Case, async: true
 3 |   doctest Statistics.Math.Functions
 4 | 
 5 |   alias Statistics.Math.Functions
 6 |   alias Statistics.Math
 7 | 
 8 |   alias Statistics.Distributions.T
 9 | 
10 |   test "gamma function" do
11 |     assert Functions.gamma(22) == 5.109094217170951e19
12 |     assert Functions.gamma(0.02) == 49.44221016319569
13 |   end
14 | 
15 |   test "incomplete gamma function" do
16 |     assert Functions.gammainc(2, 2) == 0.5939941502901618
17 |     assert Functions.gammainc(1, 2) == 0.8646647167633872
18 |     assert Functions.gammainc(1, 1) == 0.63212055882855778
19 |     # not a complete solution ... some cases that do not work
20 |     # assert Functions.gammainc(4,1) == 0.018988156876153808
21 |     # assert Functions.gammainc(0.1,1) == 0.97587265627367215
22 |   end
23 | 
24 |   test "beta function" do
25 |     assert Functions.beta(1, 2) == 0.4999999999999996
26 |     assert Functions.beta(2, 2) == 0.16666666666666638
27 |     assert Functions.beta(0.05, 1) == 20.000000000000007
28 |   end
29 | 
30 |   test "hypergeometric 2F1 function" do
31 |     # This is not a correct implementation, fails in many cases.
32 |     # assert Functions.hyp2f1(0.2, 3, 0.2, 0.2) == 1.9531249999999998
33 |     assert Functions.hyp2f1(1, 2, 1, 0.5) == 3.999999999999955
34 |     assert Functions.hyp2f1(1, 1, 1, 0.5) == 1.9999999999999996
35 |   end
36 | 
37 |   test "simpsons numeric integration rule" do
38 |     f = fn x -> Math.pow(x, 9) end
39 |     sr = Functions.simpson(f, 0, 10, 100_000)
40 |     assert Math.round(sr, 1) == 1_000_000_000.0
41 | 
42 |     # integral of t.pdf(x, 1) at 2 and -2
43 |     sr = Functions.simpson(T.pdf(1), -10000, 2, 100_000)
44 |     assert sr == 0.8523845106569062
45 |     sr = Functions.simpson(T.pdf(1), -10000, -2, 100_000)
46 |     assert sr == 0.14755182730100083
47 |   end
48 | end
49 | 


--------------------------------------------------------------------------------
/test/math_utils_test.exs:
--------------------------------------------------------------------------------
 1 | defmodule MathTest do
 2 |   use ExUnit.Case, async: true
 3 |   doctest Statistics.Math
 4 | 
 5 |   alias Statistics.Math
 6 | 
 7 |   test "square root" do
 8 |     assert Math.sqrt(9) == 3
 9 |     assert Math.sqrt(99) == 9.9498743710662
10 |   end
11 | 
12 |   test "raise to a power" do
13 |     assert Math.pow(3, 2) == 9
14 |     assert Math.pow(2, -1) == 0.5
15 |     assert Math.pow(-0.5, -0.5) == -1.4142135623730951
16 |     assert Math.pow(99, 3) == 970_299
17 |   end
18 | 
19 |   test "constant e" do
20 |     assert Math.e() == 2.718281828459045
21 |   end
22 | 
23 |   test "constant pi" do
24 |     assert Math.pi() == 3.141592653589793
25 |   end
26 | 
27 |   test "natural log" do
28 |     assert Math.ln(2) == 0.6931471805599453
29 |     assert Math.ln(99) == 4.59511985013459
30 |   end
31 | 
32 |   test "exponent function" do
33 |     assert Math.exp(2) == 7.38905609893065
34 |   end
35 | 
36 |   test "round a decimal" do
37 |     assert Math.round(99.999999, 3) == 100
38 |     assert Math.round(0.123456, 4) == 0.1235
39 |     assert Math.round(0.123436, 4) == 0.1234
40 |     assert Math.round(1.123456, 0) == 1
41 |   end
42 | 
43 |   test "get absolute value" do
44 |     assert Math.abs(-2) == 2
45 |     assert Math.abs(2.2) == 2.2
46 |   end
47 | 
48 |   test "calculate factorial" do
49 |     assert Math.factorial(0) == 1
50 |     assert Math.factorial(1.0) == 1
51 |     assert Math.factorial(5) == 120
52 |     assert Math.factorial(11) == 39_916_800
53 |   end
54 | 
55 |   test "get the floor as a float" do
56 |     assert Math.floor(2) == 2.0
57 |     assert Math.floor(2.9999) == 2
58 |     assert Math.floor(-2.2) == -3.0
59 |   end
60 | 
61 |   test "get the ceiling as a float" do
62 |     assert Math.ceil(2) == 2.0
63 |     assert Math.ceil(2.9999) == 3.0
64 |     assert Math.ceil(-2.2) == -2.0
65 |   end
66 | 
67 |   test "turn a float into an integer" do
68 |     assert Math.to_int(2) == 2
69 |     assert Math.to_int(2.2) == 2
70 |     assert Math.to_int(599.9) == 599
71 |   end
72 | 
73 |   test "calculate K permutations for n items" do
74 |     assert Math.permutation(1, 2) == 0
75 |     assert Math.permutation(1, -1) == 0
76 |     assert Math.permutation(-1, 1) == 0
77 |     assert Math.permutation(2, 2) == 2
78 |     assert Math.permutation(20, 5) == 1_860_480
79 |   end
80 | 
81 |   test "calculate K combinations for n items" do
82 |     assert Math.combination(1, 2) == 0
83 |     assert Math.combination(1, -1) == 0
84 |     assert Math.combination(-1, 1) == 0
85 |     assert Math.combination(2, 2) == 1
86 |     assert Math.combination(20, 5) == 15504
87 |   end
88 | end
89 | 


--------------------------------------------------------------------------------
/test/normal_distribution_test.exs:
--------------------------------------------------------------------------------
 1 | defmodule NormalDistributionTest do
 2 |   use ExUnit.Case, async: true
 3 |   doctest Statistics.Distributions.Normal, except: [rand: 0, rand: 3]
 4 | 
 5 |   alias Statistics.Distributions.Normal, as: Norm
 6 |   alias Statistics.Math
 7 | 
 8 |   # to get mitigate the vagaries of floating-point math
 9 |   # and rounding errors, test equivalence to 4 decimal places
10 |   def assert_p(left, right, precision \\ 4) do
11 |     assert Math.round(left, precision) == Math.round(right, precision)
12 |   end
13 | 
14 |   test "output of the pdf function" do
15 |     assert Norm.pdf().(0) == 0.3989422804014327
16 |     assert Norm.pdf(0.2, 1).(3) == 0.00791545158297997
17 |     assert Norm.pdf().(-1) == 0.24197072451914337
18 |     assert Norm.pdf(23.5, 1.7).(22.0) == 0.15900173884840293
19 |   end
20 | 
21 |   test "return a cdf " do
22 |     assert Norm.cdf().(2) == 0.9772499371127437
23 |     assert_p(Norm.cdf().(0), 0.5)
24 |     assert Norm.cdf(2, 2.5).(2.8) == 0.6255157658802836
25 |     assert_p(Norm.cdf(2, 2.5).(2), 0.5)
26 |   end
27 | 
28 |   test "return a normally-distributed random number" do
29 |     assert is_float(Norm.rand())
30 |     rands = for _ <- 0..10000, do: Norm.rand(5, 1.5)
31 |     assert_p(Statistics.mean(rands), 5, 1)
32 |     assert_p(Statistics.stdev(rands), 1.5, 1)
33 |   end
34 | 
35 |   test "get the percentile point value" do
36 |     assert Norm.ppf().(0.975) == 1.9603949169253396
37 |     assert Norm.ppf().(0.025) == -1.96039491692534
38 |     assert Norm.ppf().(0.75) == 0.6741891400433162
39 |     assert Norm.ppf(7, 2.1).(0.25) == 5.584202805909036
40 |     assert Norm.ppf(37.66, 1.31).(0.95) == 39.81522698658839
41 |   end
42 | end
43 | 


--------------------------------------------------------------------------------
/test/poisson_distribution_test.exs:
--------------------------------------------------------------------------------
 1 | defmodule PoissonDistributionTest do
 2 |   use ExUnit.Case, async: true
 3 |   doctest Statistics.Distributions.Poisson, except: [rand: 1]
 4 | 
 5 |   alias Statistics.Distributions.Poisson
 6 |   alias Statistics.Math
 7 | 
 8 |   test "output of the pmf function" do
 9 |     assert Poisson.pmf(1).(1) == 0.36787944117144233
10 |     assert Poisson.pmf(10).(10) == 0.1251100357211333
11 |     assert Poisson.pmf(10).(4) == 0.018916637401035358
12 |   end
13 | 
14 |   test "return a cdf " do
15 |     assert Poisson.cdf(1).(1) == 0.73575888234288467
16 |     assert Poisson.cdf(10).(5) == 0.06708596287903179
17 |   end
18 | 
19 |   test "get the percentile point value" do
20 |     assert Poisson.ppf(1).(0.95) == 3.0
21 |     assert Poisson.ppf(10).(0.05) == 5.0
22 |     assert Poisson.ppf(32).(0.75) == 36.0
23 |     assert Poisson.ppf(62).(0.05) == 49.0
24 |   end
25 | 
26 |   test "random number generation" do
27 |     r = Poisson.rand(10)
28 |     assert Math.floor(r) == r
29 |   end
30 | end
31 | 


--------------------------------------------------------------------------------
/test/t_distribution_test.exs:
--------------------------------------------------------------------------------
 1 | defmodule TDistributionTest do
 2 |   use ExUnit.Case, async: true
 3 |   doctest Statistics.Distributions.T, except: [rand: 0, rand: 3]
 4 | 
 5 |   alias Statistics.Distributions.T
 6 | 
 7 |   test "output of the pdf function" do
 8 |     assert T.pdf(3).(0) == 0.3675525969478612
 9 |     assert T.pdf(7).(0.1) == 0.38279933426055135
10 |     assert T.pdf(77).(0.1) == 0.39564030492250557
11 |   end
12 | 
13 |   test "return a cdf " do
14 |     assert T.cdf(3).(2) == 0.9115868129912105
15 |     # assert T.cdf(0, 1) == 0.48740751602180743 # ~ 0.5
16 |   end
17 | 
18 |   test "return a random number from the distribution" do
19 |     assert is_float(T.rand(2))
20 |     # rands = for _ <- 0..10000, do: T.rand(3)
21 |     # assert T.rand(77) == 0.5
22 |   end
23 | 
24 |   @tag timeout: 120_000
25 |   test "get the percentile point value" do
26 |     assert T.ppf(1).(0.1) == -3.0799999999999996
27 |   end
28 | end
29 | 


--------------------------------------------------------------------------------
/test/t_test_test.exs:
--------------------------------------------------------------------------------
 1 | defmodule TTestTest do
 2 |   use ExUnit.Case, async: true
 3 |   doctest Statistics.Tests.TTest
 4 | 
 5 |   alias Statistics.Tests.TTest
 6 | 
 7 |   test "one sample t-test" do
 8 |     assert TTest.one_sample([1, 2, 1, 2, 1, 2], 1.5) == %{p: 1.0, t: 0.0}
 9 | 
10 |     assert TTest.one_sample([4, 1, 2, 3, 1, 2, 3, 4], 2) == %{
11 |              p: 0.23032680249555892,
12 |              t: 1.2649110640673518
13 |            }
14 |   end
15 | 
16 |   test "independent samples t-test" do
17 |     assert TTest.ind_samples([1, 2, 1, 2, 1, 2], [2, 1, 2, 1, 2, 1]) == %{t: 0.0, p: 1.0}
18 | 
19 |     assert TTest.ind_samples([1, 2, 1, 2, 1, 2], [5, 6, 7, 6, 7, 5]) == %{
20 |              p: 4.3244470028110745e-7,
21 |              t: -11.512838683202965
22 |            }
23 |   end
24 | end
25 | 


--------------------------------------------------------------------------------
/test/test_helper.exs:
--------------------------------------------------------------------------------
1 | ExUnit.start()
2 | 


--------------------------------------------------------------------------------