├── .formatter.exs ├── .github └── workflows │ └── elixir.yml ├── .gitignore ├── LICENSE ├── README.md ├── lib ├── math │ ├── functions.ex │ └── math.ex ├── statistics.ex └── statistics │ ├── distributions │ ├── beta.ex │ ├── binomial.ex │ ├── chisq.ex │ ├── exponential.ex │ ├── f.ex │ ├── hypergeometric.ex │ ├── normal.ex │ ├── poisson.ex │ └── t.ex │ └── tests │ └── t_test.ex ├── mix.exs ├── package.exs └── test ├── beta_distribution_test.exs ├── binomial_distribution_test.exs ├── chisq_distribution_test.exs ├── descriptive_test.exs ├── exponential_distribution_test.exs ├── f_distribution_test.exs ├── hypergeometric_distribution_test.exs ├── math_functions_test.exs ├── math_utils_test.exs ├── normal_distribution_test.exs ├── poisson_distribution_test.exs ├── t_distribution_test.exs ├── t_test_test.exs └── test_helper.exs /.formatter.exs: -------------------------------------------------------------------------------- 1 | # Used by "mix format" 2 | [ 3 | inputs: ["mix.exs", "{config,lib,test}/**/*.{ex,exs}"] 4 | ] 5 | -------------------------------------------------------------------------------- /.github/workflows/elixir.yml: -------------------------------------------------------------------------------- 1 | # This workflow uses actions that are not certified by GitHub. 2 | # They are provided by a third-party and are governed by 3 | # separate terms of service, privacy policy, and support 4 | # documentation. 5 | 6 | name: Elixir CI 7 | 8 | on: 9 | push: 10 | branches: [ "master" ] 11 | pull_request: 12 | branches: [ "master" ] 13 | 14 | permissions: 15 | contents: read 16 | 17 | jobs: 18 | build: 19 | 20 | name: Build and test 21 | runs-on: ubuntu-latest 22 | 23 | steps: 24 | - uses: actions/checkout@v3 25 | - name: Set up Elixir 26 | uses: erlef/setup-beam@61e01a43a562a89bfc54c7f9a378ff67b03e4a21 # v1.16.0 27 | with: 28 | elixir-version: '1.15.2' # [Required] Define the Elixir version 29 | otp-version: '26.0' # [Required] Define the Erlang/OTP version 30 | - name: Restore dependencies cache 31 | uses: actions/cache@v3 32 | with: 33 | path: deps 34 | key: ${{ runner.os }}-mix-${{ hashFiles('**/mix.lock') }} 35 | restore-keys: ${{ runner.os }}-mix- 36 | - name: Install dependencies 37 | run: mix deps.get 38 | - name: Run tests 39 | run: mix test 40 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | /ebin 2 | /deps 3 | erl_crash.dump 4 | *.ez 5 | /_build 6 | /doc 7 | mix.lock 8 | TODO 9 | *.txt 10 | priv/ 11 | .idea/ 12 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Copyright (c) 2014 Max Sharples 2 | 3 | Licensed under the Apache License, Version 2.0 (the "License"); 4 | you may not use this file except in compliance with the License. 5 | You may obtain a copy of the License at 6 | 7 | http://www.apache.org/licenses/LICENSE-2.0 8 | 9 | Unless required by applicable law or agreed to in writing, software 10 | distributed under the License is distributed on an "AS IS" BASIS, 11 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | See the License for the specific language governing permissions and 13 | limitations under the License. 14 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Statistics 2 | 3 | [![Build Status](https://github.com/msharp/elixir-statistics/actions/workflows/elixir.yml/badge.svg)](https://github.com/msharp/elixir-statistics/actions/workflows/elixir.yml) 4 | [![hex.pm version](https://img.shields.io/hexpm/v/statistics.svg?style=flat)](https://hex.pm/packages/statistics) 5 | 6 | Statistics functions and distributions for [Elixir](https://github.com/elixir-lang/elixir). 7 | 8 | ## Usage 9 | 10 | Add Statistics as a dependency in your `mix.exs` file to install from [hex.pm](https://hex.pm). 11 | 12 | ```elixir 13 | def deps do 14 | [ 15 | { :statistics, "~> 0.6"} 16 | ] 17 | end 18 | ``` 19 | 20 | After you are done, run `mix deps.get` in your shell to fetch and compile Statistics. 21 | 22 | To try it out, start an interactive Elixir shell with `iex -S mix`. 23 | 24 | Get the median value from a list 25 | 26 | ``` 27 | iex> Statistics.median([1,2,3]) 28 | 2 29 | ``` 30 | 31 | Calculate the variance of a list of values. 32 | 33 | ``` 34 | iex> Statistics.variance([1,2,3,4]) 35 | 1.25 36 | ``` 37 | 38 | Or draw a random number from a Gaussian distribution with a mean of 1 and standard deviation of 2. 39 | 40 | ``` 41 | iex> Statistics.Distributions.Normal.rand(1, 2) 42 | 2.5998185179627384 43 | ``` 44 | 45 | ## Documentation 46 | 47 | Elixir has great documentation tools using `ex_doc`. 48 | 49 | The docs are hosted on [hexdocs.pm/statistics](http://hexdocs.pm/statistics/). 50 | 51 | ## Performance 52 | 53 | This is not a library to use if you need fast computation. 54 | 55 | Everything is implemented in Elixir. Many of the implementations use slow approximations, numerical function integration, or trial-and-error methods. 56 | 57 | There is much room for improvement. To make this library really fast (and precise), we would probably need to interface with existing C libraries. 58 | 59 | ## Contributing 60 | 61 | I will accept pull requests. 62 | 63 | If you want to contribute, please create a topic branch with tests and submit a pull request. 64 | 65 | ## License 66 | 67 | Apache 2 68 | -------------------------------------------------------------------------------- /lib/math/functions.ex: -------------------------------------------------------------------------------- 1 | defmodule Statistics.Math.Functions do 2 | alias Statistics.Math 3 | 4 | @doc """ 5 | The Gamma function 6 | 7 | This implementation uses the [Lanczos approximation](http://en.wikipedia.org/wiki/Lanczos_approximation) 8 | 9 | ## Examples 10 | 11 | iex> Statistics.Math.Functions.gamma(0.5) 12 | 1.7724538509055159 13 | 14 | """ 15 | @spec gamma(number) :: number 16 | def gamma(x) do 17 | gamma_lanczos(x) 18 | # gamma_taylor(x) 19 | end 20 | 21 | defp gamma_lanczos(x) do 22 | # coefficients used by the GNU Scientific Library 23 | g = 7 24 | 25 | p = [ 26 | 0.99999999999980993, 27 | 676.5203681218851, 28 | -1259.1392167224028, 29 | 771.32342877765313, 30 | -176.61502916214059, 31 | 12.507343278686905, 32 | -0.13857109526572012, 33 | 9.9843695780195716e-6, 34 | 1.5056327351493116e-7 35 | ] 36 | 37 | # recursive formula 38 | if x < 0.5 do 39 | Math.pi() / (:math.sin(Math.pi() * x) * gamma_lanczos(1 - x)) 40 | else 41 | z = x - 1 42 | xs = for i <- 1..8, do: Enum.at(p, i) / (z + i) 43 | x = Enum.at(p, 0) + Enum.sum(xs) 44 | t = z + g + 0.5 45 | Math.sqrt(2 * Math.pi()) * Math.pow(t, z + 0.5) * Math.exp(-1 * t) * x 46 | end 47 | end 48 | 49 | @doc """ 50 | The Beta function 51 | 52 | ## Examples 53 | 54 | iex> Statistics.Math.Functions.beta(2, 0.5) 55 | 1.3333333333333324 56 | 57 | """ 58 | @spec beta(number, number) :: number 59 | def beta(x, y) do 60 | # from https://en.wikipedia.org/wiki/Beta_function#Properties 61 | gamma(x) * gamma(y) / gamma(x + y) 62 | end 63 | 64 | @doc """ 65 | The 'error' function 66 | 67 | Formula 7.1.26 given in Abramowitz and Stegun. 68 | Formula appears as 1 – (a1t1 + a2t2 + a3t3 + a4t4 + a5t5)exp(-x2) 69 | 70 | """ 71 | # Some wisdom in Horner's Method of coding polynomials: 72 | # - We could evaluate a polynomial of the form a + bx + cx^2 + dx^3 by coding as a + b*x + c*x*x + d*x*x*x. 73 | # - But we can save computational power by coding it as ((d*x + c)*x + b)*x + a. 74 | # - The formula below was coded this way bringing down the complexity of this algorithm from O(n2) to O(n).'' 75 | @spec erf(number) :: number 76 | def erf(x) do 77 | # constants 78 | {a1, a2, a3, a4, a5} = {0.254829592, -0.284496736, 1.421413741, -1.453152027, 1.061405429} 79 | p = 0.3275911 80 | 81 | # Save the sign of x 82 | sign = if x < 0, do: -1, else: 1 83 | x = abs(x) 84 | 85 | # Formula 7.1.26 given in Abramowitz and Stegun. 86 | t = 1.0 / (1.0 + p * x) 87 | y = 1.0 - ((((a5 * t + a4) * t + a3) * t + a2) * t + a1) * t * Math.pow(Math.e(), -x * x) 88 | 89 | sign * y 90 | end 91 | 92 | @doc """ 93 | The inverse 'error' function 94 | """ 95 | @spec inv_erf(number) :: number 96 | def inv_erf(x) do 97 | # constants 98 | {c0, c1, c2} = {2.515517, 0.802853, 0.010328} 99 | {d0, d1, d2} = {1.432788, 0.189269, 0.001308} 100 | # formula 101 | x - ((c2 * x + c1) * x + c0) / (((d2 * x + d1) * x + d0) * x + 1.0) 102 | end 103 | 104 | @doc """ 105 | Lower incomplete Gamma function 106 | 107 | ## Examples 108 | 109 | iex> Statistics.Math.Functions.gammainc(1,1) 110 | 0.63212055882855778 111 | 112 | """ 113 | # ############################ 114 | # this simple approach adapted from 115 | # http://www.dreamincode.net/forums/topic/12775-statistical-functions/ 116 | # 117 | # there are alternate implementation strategies to try, 118 | # for examples, see: 119 | # 120 | # : https://mail.python.org/pipermail/python-list/2001-April/092498.html 121 | # : http://www.dreamincode.net/forums/topic/12775-statistical-functions/ 122 | # : http://www.crbond.com/math.htm 123 | # 124 | # ########################### 125 | @spec gammainc(number, number) :: number 126 | def gammainc(a, x) do 127 | Math.pow(x, a) * Math.exp(-x) * gammainc_sum(a, x, 1 / a, 0, 1) 128 | end 129 | 130 | defp gammainc_sum(_, _, t, s, _) when t == 0.0 do 131 | s 132 | end 133 | 134 | defp gammainc_sum(a, x, t, s, n) do 135 | s = s + t 136 | t = t * (x / (a + n)) 137 | gammainc_sum(a, x, t, s, n + 1) 138 | end 139 | 140 | @doc """ 141 | Hypergeometrc 2F1 functiono 142 | 143 | WARNING: the implementation is incomplete, and should not be used 144 | 145 | """ 146 | # from http://mhtlab.uwaterloo.ca/courses/me755/web_chap7.pdf 147 | @spec hyp2f1(number, number, number, number) :: number 148 | def hyp2f1(a, b, c, x) do 149 | pb = gamma(c) / gamma(a) * gamma(b) 150 | pa = hyp2f1_cont(a, b, c, x) 151 | pb * pa 152 | end 153 | 154 | defp hyp2f1_cont(a, b, c, x) do 155 | hyp2f1_cont(a, b, c, x, 0, 0) 156 | end 157 | 158 | defp hyp2f1_cont(_, _, _, _, n, acc) when n > 50 do 159 | acc 160 | end 161 | 162 | defp hyp2f1_cont(a, b, c, x, n, acc) do 163 | s = gamma(a + n) * gamma(b + n) / gamma(c + n) 164 | p = Math.pow(x, n) / Math.factorial(n) 165 | hyp2f1_cont(a, b, c, x, n + 1, acc + s * p) 166 | end 167 | 168 | @doc """ 169 | Simpsons rule for numerical integration of a function 170 | 171 | see: http://en.wikipedia.org/wiki/Simpson's_rule 172 | 173 | ## Examples 174 | 175 | iex> Statistics.Math.Functions.simpson(fn x -> x*x*x end, 0, 20, 100000) 176 | 40000.00000000011 177 | 178 | """ 179 | @spec simpson(fun, number, number, number) :: number 180 | def simpson(f, a, b, n) do 181 | h = (b - a) / n 182 | s1 = f.(a) + f.(b) 183 | 184 | s2 = 185 | Stream.take_every(1..(n - 1), 2) 186 | |> Enum.map(fn i -> 4 * f.(a + i * h) end) 187 | |> Enum.sum() 188 | 189 | s3 = 190 | Stream.take_every(2..(n - 2), 2) 191 | |> Enum.map(fn i -> 2 * f.(a + i * h) end) 192 | |> Enum.sum() 193 | 194 | (s1 + s2 + s3) * h / 3 195 | end 196 | end 197 | -------------------------------------------------------------------------------- /lib/math/math.ex: -------------------------------------------------------------------------------- 1 | defmodule Statistics.Math do 2 | @e :math.exp(1) 3 | @pi :math.pi() 4 | 5 | @doc """ 6 | Get square root 7 | 8 | return sqrt from Erlang 9 | 10 | ## Examples 11 | 12 | iex> Statistics.Math.sqrt(9) 13 | 3.0 14 | iex> Statistics.Math.sqrt(99) 15 | 9.9498743710662 16 | 17 | """ 18 | @spec sqrt(number) :: number 19 | defdelegate sqrt(num), to: :math 20 | 21 | @doc """ 22 | Get power from Erlang 23 | 24 | This is needed because Elixir doesn't 25 | currently have the `**` operator 26 | 27 | ## Examples 28 | 29 | iex> Statistics.Math.pow(2,3) 30 | 8.0 31 | iex> Statistics.Math.pow(9,9) 32 | 387420489.0 33 | iex> Statistics.Math.pow(2,0) 34 | 1 35 | iex> Statistics.Math.pow(-2, 1.5) 36 | -2.8284271247461903 37 | iex> Statistics.Math.pow(0, 5) 38 | 0 39 | 40 | """ 41 | @spec pow(number, number) :: number 42 | def pow(_, 0), do: 1 43 | def pow(0, pow) when pow >= 0, do: 0 44 | # Erlang doesn't like raising negative numbers to non-integer powers 45 | def pow(num, pow) when num < 0 and is_float(pow) do 46 | :math.pow(-num, pow) * -1 47 | end 48 | 49 | # otherwise let erlang do it 50 | defdelegate pow(num, pow), to: :math 51 | 52 | @doc """ 53 | The constant *e* 54 | 55 | ## Examples 56 | 57 | iex> Statistics.Math.e 58 | 2.718281828459045 59 | 60 | """ 61 | @spec e() :: number 62 | def e do 63 | @e 64 | end 65 | 66 | @doc """ 67 | The constant *pi* 68 | 69 | (returned from Erlang Math module) 70 | 71 | ## Examples 72 | 73 | iex> Statistics.Math.pi 74 | 3.141592653589793 75 | 76 | """ 77 | @spec pi() :: number 78 | def pi do 79 | @pi 80 | end 81 | 82 | @doc """ 83 | The natural log 84 | 85 | ( from Erlang Math module) 86 | 87 | ## Examples 88 | 89 | iex> Statistics.Math.ln(20) 90 | 2.995732273553991 91 | iex> Statistics.Math.ln(200) 92 | 5.298317366548036 93 | 94 | """ 95 | @spec ln(number) :: number 96 | defdelegate ln(i), to: :math, as: :log 97 | 98 | @doc """ 99 | Exponent function 100 | 101 | Raise *e* to given power 102 | 103 | ## Examples 104 | 105 | iex> Statistics.Math.exp(5.6) 106 | 270.42640742615254 107 | 108 | """ 109 | @spec exp(number) :: number 110 | defdelegate exp(x), to: :math 111 | 112 | @doc """ 113 | Get a random number from erlang 114 | """ 115 | @spec rand() :: number 116 | defdelegate rand(), to: :rand, as: :uniform 117 | 118 | @doc """ 119 | Round a decimal to a specific precision 120 | 121 | ## Examples 122 | 123 | iex> Statistics.Math.round(0.123456, 4) 124 | 0.1235 125 | 126 | """ 127 | @spec round(number, number) :: number 128 | def round(x, precision) do 129 | p = pow(10, precision) 130 | :erlang.round(x * p) / p 131 | end 132 | 133 | @doc """ 134 | Floor function 135 | 136 | ## Examples 137 | 138 | iex> Statistics.Math.floor(3.999) 139 | 3.0 140 | 141 | """ 142 | @spec floor(number) :: number 143 | def floor(x) do 144 | f = :erlang.trunc(x) * 1.0 145 | 146 | cond do 147 | x - f >= 0 -> 148 | f 149 | 150 | x - f < 0 -> 151 | f - 1 152 | end 153 | end 154 | 155 | @doc """ 156 | Ceiling function 157 | 158 | ## Examples 159 | 160 | iex> Statistics.Math.ceil(3.999) 161 | 4.0 162 | 163 | """ 164 | @spec ceil(number) :: number 165 | def ceil(x) do 166 | f = :erlang.trunc(x) * 1.0 167 | 168 | cond do 169 | x - f > 0 -> 170 | f + 1 171 | 172 | x - f <= 0 -> 173 | f 174 | end 175 | end 176 | 177 | @doc """ 178 | Get the absolute value of a number 179 | 180 | ## Examples 181 | 182 | iex> Statistics.Math.abs(-4) 183 | 4 184 | 185 | """ 186 | @spec abs(number) :: number 187 | defdelegate abs(x), to: :erlang 188 | 189 | @doc """ 190 | Factorial! 191 | """ 192 | @spec factorial(non_neg_integer) :: non_neg_integer 193 | def factorial(n) when n < 0 do 194 | raise ArithmeticError, message: "Argument n must be a positive number" 195 | end 196 | 197 | def factorial(n) when n == 0 or n == 1 do 198 | 1 199 | end 200 | 201 | def factorial(n) do 202 | (to_int(n) - 1)..1 203 | |> Enum.to_list() 204 | |> List.foldl(n, fn x, acc -> x * acc end) 205 | end 206 | 207 | @doc """ 208 | Get the base integer from a float 209 | 210 | ## Examples 211 | 212 | iex> Statistics.Math.to_int(66.6666) 213 | 66 214 | 215 | """ 216 | @spec to_int(number) :: integer 217 | defdelegate to_int(f), to: :erlang, as: :trunc 218 | 219 | @doc """ 220 | The number of k combinations of n 221 | 222 | Both arguments must be integers greater than zero. `k` may not be larger than `n`. 223 | 224 | ## Examples 225 | 226 | iex> Statistics.Math.combination(10, 3) 227 | 120 228 | 229 | """ 230 | @spec combination(non_neg_integer, non_neg_integer) :: non_neg_integer 231 | def combination(n, k) when k > n, do: 0 232 | def combination(n, _) when n < 0, do: 0 233 | def combination(_, k) when k < 0, do: 0 234 | 235 | def combination(n, k) do 236 | :erlang.div(factorial(n), factorial(k) * factorial(n - k)) 237 | end 238 | 239 | @doc """ 240 | The number of k permuations of n 241 | 242 | Both arguments must be integers greater than zero. `k` may not be larger than `n`. 243 | 244 | ## Examples 245 | 246 | iex> Statistics.Math.permutation(10, 3) 247 | 720 248 | 249 | """ 250 | @spec permutation(non_neg_integer, non_neg_integer) :: non_neg_integer 251 | def permutation(n, k) when k > n, do: 0 252 | def permutation(n, _) when n < 0, do: 0 253 | # def permutation(_, k) when k < 0, do: 0 254 | def permutation(n, k) do 255 | :erlang.div(factorial(n), factorial(n - k)) 256 | end 257 | end 258 | -------------------------------------------------------------------------------- /lib/statistics.ex: -------------------------------------------------------------------------------- 1 | defmodule Statistics do 2 | alias Statistics.Math 3 | 4 | @moduledoc """ 5 | Descriptive statistics functions 6 | """ 7 | 8 | @doc """ 9 | Sum the contents of a list 10 | 11 | Calls Enum.sum/1 12 | """ 13 | @spec sum([number]) :: number 14 | def sum(list) when is_list(list), do: do_sum(list, 0) 15 | 16 | defp do_sum([], t), do: t 17 | defp do_sum([x | xs], t), do: do_sum(xs, t + x) 18 | 19 | @doc """ 20 | Calculate the mean from a list of numbers 21 | 22 | ## Examples 23 | 24 | iex> Statistics.mean([]) 25 | nil 26 | iex> Statistics.mean([1,2,3]) 27 | 2.0 28 | 29 | """ 30 | @spec mean([number]) :: float() | nil 31 | def mean(list) when is_list(list), do: do_mean(list, 0, 0) 32 | 33 | defp do_mean([], 0, 0), do: nil 34 | defp do_mean([], t, l), do: t / l 35 | 36 | defp do_mean([x | xs], t, l) do 37 | do_mean(xs, t + x, l + 1) 38 | end 39 | 40 | @doc """ 41 | Get the median value from a list. 42 | 43 | ## Examples 44 | 45 | iex> Statistics.median([]) 46 | nil 47 | iex> Statistics.median([1,2,3]) 48 | 2 49 | iex> Statistics.median([1,2,3,4]) 50 | 2.5 51 | 52 | """ 53 | @spec median([number]) :: number | nil 54 | def median([]), do: nil 55 | 56 | def median(list) when is_list(list) do 57 | midpoint = 58 | (length(list) / 2) 59 | |> Float.floor() 60 | |> round 61 | 62 | {l1, l2} = 63 | Enum.sort(list) 64 | |> Enum.split(midpoint) 65 | 66 | case length(l2) > length(l1) do 67 | true -> 68 | [med | _] = l2 69 | med 70 | 71 | false -> 72 | [m1 | _] = l2 73 | [m2 | _] = Enum.reverse(l1) 74 | mean([m1, m2]) 75 | end 76 | end 77 | 78 | @doc """ 79 | Get the most frequently occuring value 80 | 81 | ## Examples 82 | 83 | iex> Statistics.mode([]) 84 | nil 85 | iex> Statistics.mode([1,2,3,2,4,5,2,6,7,2,8,9]) 86 | 2 87 | 88 | """ 89 | @spec mode([number]) :: number | nil 90 | def mode([]), do: nil 91 | 92 | def mode(list) when is_list(list) do 93 | h = hist(list) 94 | max = Map.values(h) |> Enum.max() 95 | h |> Enum.find(fn {_, val} -> val == max end) |> elem(0) 96 | end 97 | 98 | @doc """ 99 | Get a frequency count of the values in a list 100 | 101 | ## Examples 102 | 103 | iex> Statistics.hist([]) 104 | nil 105 | iex> Statistics.hist([1,2,3,2,4,5,2,5,1,2,5,5]) 106 | %{1 => 2, 2 => 4, 3 => 1, 4 => 1, 5 => 4} 107 | 108 | """ 109 | @spec hist([number]) :: map | nil 110 | def hist([]), do: nil 111 | 112 | def hist(list) when is_list(list) do 113 | list 114 | |> Enum.reduce(%{}, fn tag, acc -> Map.update(acc, tag, 1, &(&1 + 1)) end) 115 | end 116 | 117 | @doc """ 118 | Get the minimum value from a list 119 | 120 | iex> Statistics.min([]) 121 | nil 122 | iex> Statistics.min([1,2,3]) 123 | 1 124 | 125 | If a non-empty list is provided, it is a call to Enum.min/1 126 | """ 127 | @spec min([number]) :: number | nil 128 | def min([]), do: nil 129 | 130 | def min(list) do 131 | Enum.min(list) 132 | end 133 | 134 | @doc """ 135 | Get the maximum value from a list 136 | 137 | iex> Statistics.max([]) 138 | nil 139 | iex> Statistics.max([1,2,3]) 140 | 3 141 | 142 | If a non-empty list is provided, it is a call to Enum.max/1 143 | """ 144 | @spec max([number]) :: number | nil 145 | def max([]), do: nil 146 | 147 | def max(list) do 148 | Enum.max(list) 149 | end 150 | 151 | @doc """ 152 | Get the quartile cutoff value from a list 153 | 154 | responds to only first and third quartile. 155 | 156 | ## Examples 157 | 158 | iex> Statistics.quartile([1,2,3,4,5,6,7,8,9],:first) 159 | 3 160 | iex> Statistics.quartile([1,2,3,4,5,6,7,8,9],:third) 161 | 7 162 | 163 | """ 164 | # TODO change these to call `percentile/2` 165 | @spec quartile([number], :first | :third) :: number 166 | def quartile(list, :first) do 167 | list |> split |> elem(0) |> median 168 | end 169 | 170 | def quartile(list, :third) do 171 | list |> split |> elem(1) |> median 172 | end 173 | 174 | @doc """ 175 | Get the nth percentile cutoff from a list 176 | 177 | ## Examples 178 | 179 | iex> Statistics.percentile([], 50) 180 | nil 181 | iex> Statistics.percentile([1], 50) 182 | 1 183 | iex> Statistics.percentile([1,2,3,4,5,6,7,8,9],80) 184 | 7.4 185 | iex> Statistics.percentile([1,2,3,4,5,6,7,8,9],100) 186 | 9 187 | 188 | """ 189 | @spec percentile([number], number) :: number | nil 190 | def percentile([], _), do: nil 191 | def percentile([x], _), do: x 192 | def percentile(list, 0), do: min(list) 193 | def percentile(list, 100), do: max(list) 194 | 195 | def percentile(list, n) when is_list(list) and is_number(n) do 196 | s = Enum.sort(list) 197 | r = n / 100.0 * (length(list) - 1) 198 | f = :erlang.trunc(r) 199 | lower = Enum.at(s, f) 200 | upper = Enum.at(s, f + 1) 201 | lower + (upper - lower) * (r - f) 202 | end 203 | 204 | @doc """ 205 | Get range of data 206 | 207 | ## Examples 208 | 209 | iex> Statistics.range([1,2,3,4,5,6]) 210 | 5 211 | 212 | """ 213 | @spec range([number]) :: number | nil 214 | def range([]), do: nil 215 | 216 | def range(list) when is_list(list) do 217 | max(list) - min(list) 218 | end 219 | 220 | @doc """ 221 | Calculate the inter-quartile range 222 | 223 | ## Examples 224 | 225 | iex> Statistics.iqr([]) 226 | nil 227 | iex> Statistics.iqr([1,2,3,4,5,6,7,8,9]) 228 | 4 229 | 230 | """ 231 | @spec iqr([number]) :: number | nil 232 | def iqr([]), do: nil 233 | 234 | def iqr(list) when is_list(list) do 235 | {first, second} = split(list) 236 | median(second) - median(first) 237 | end 238 | 239 | @doc """ 240 | Calculate variance from a list of numbers 241 | 242 | ## Examples 243 | 244 | iex> Statistics.variance([]) 245 | nil 246 | iex> Statistics.variance([1,2,3,4]) 247 | 1.25 248 | iex> Statistics.variance([55,56,60,65,54,51,39]) 249 | 56.48979591836735 250 | 251 | """ 252 | @spec variance([number]) :: number | nil 253 | def variance([]), do: nil 254 | 255 | def variance(list) when is_list(list) do 256 | list_mean = mean(list) 257 | list |> Enum.map(fn x -> (list_mean - x) * (list_mean - x) end) |> mean 258 | end 259 | 260 | @doc """ 261 | Calculate the standard deviation of a list 262 | 263 | ## Examples 264 | 265 | iex> Statistics.stdev([]) 266 | nil 267 | iex> Statistics.stdev([1,2]) 268 | 0.5 269 | 270 | """ 271 | @spec stdev([number]) :: number | nil 272 | def stdev([]), do: nil 273 | 274 | def stdev(list) do 275 | list |> variance |> Math.sqrt() 276 | end 277 | 278 | @doc """ 279 | Calculate the trimmed mean of a list. 280 | 281 | Can specify cutoff values as a tuple, or simply choose the IQR min/max as the cutoffs 282 | 283 | ## Examples 284 | 285 | iex> Statistics.trimmed_mean([], :iqr) 286 | nil 287 | iex> Statistics.trimmed_mean([1,2,3], {1,3}) 288 | 2.0 289 | iex> Statistics.trimmed_mean([1,2,3,4,5,5,6,6,7,7,8,8,10,11,12,13,14,15], :iqr) 290 | 7.3 291 | 292 | """ 293 | @spec trimmed_mean([number], atom | tuple) :: number | nil 294 | def trimmed_mean([], _), do: nil 295 | 296 | def trimmed_mean(list, :iqr) do 297 | {first, second} = split(list) 298 | trimmed_mean(list, {median(first), median(second)}) 299 | end 300 | 301 | def trimmed_mean(list, {low, high}) do 302 | list |> Enum.reject(fn x -> x < low or x > high end) |> mean 303 | end 304 | 305 | @doc """ 306 | Calculates the harmonic mean from a list 307 | 308 | Harmonic mean is the number of values divided by 309 | the sum of the reciprocal of all the values. 310 | 311 | ## Examples 312 | 313 | iex> Statistics.harmonic_mean([]) 314 | nil 315 | iex> Statistics.harmonic_mean([1,2,3,4,5,6,7,8,9,10,11,12,13,14,15]) 316 | 4.5204836768674568 317 | 318 | """ 319 | @spec harmonic_mean([number]) :: number | nil 320 | def harmonic_mean([]), do: nil 321 | 322 | def harmonic_mean(list) when is_list(list) do 323 | do_harmonic_mean(list, 0, 0) 324 | end 325 | 326 | defp do_harmonic_mean([], t, l), do: l / t 327 | 328 | defp do_harmonic_mean([x | xs], t, l) do 329 | do_harmonic_mean(xs, t + 1 / x, l + 1) 330 | end 331 | 332 | @doc """ 333 | Calculate the geometric mean of a list 334 | 335 | Geometric mean is the nth root of the product of n values 336 | 337 | ## Examples 338 | 339 | iex> Statistics.geometric_mean([]) 340 | nil 341 | iex> Statistics.geometric_mean([1,2,3]) 342 | 1.8171205928321397 343 | 344 | """ 345 | @spec geometric_mean([number]) :: number | nil 346 | def geometric_mean([]), do: nil 347 | 348 | def geometric_mean(list) when is_list(list) do 349 | do_geometric_mean(list, 1, 0) 350 | end 351 | 352 | defp do_geometric_mean([], p, l), do: Math.pow(p, 1 / l) 353 | 354 | defp do_geometric_mean([x | xs], p, l) do 355 | do_geometric_mean(xs, p * x, l + 1) 356 | end 357 | 358 | @doc """ 359 | Calculates the nth moment about the mean for a sample. 360 | 361 | Generally used to calculate coefficients of skewness and kurtosis. 362 | Returns the n-th central moment as a float 363 | The denominator for the moment calculation is the number of 364 | observations, no degrees of freedom correction is done. 365 | 366 | ## Examples 367 | 368 | iex> Statistics.moment([1,2,3,4,5,6,7,8,9,8,7,6,5,4,3],3) 369 | -1.3440000000000025 370 | iex> Statistics.moment([], 2) 371 | nil 372 | 373 | """ 374 | @spec moment([number], pos_integer) :: number | nil 375 | def moment(list, n \\ 1) 376 | # empty list has no moment 377 | def moment([], _), do: nil 378 | # By definition the first moment about the mean is 0. 379 | def moment(_, 1), do: 0.0 380 | # Otherwise 381 | def moment(list, n) when is_list(list) and is_number(n) do 382 | lmean = mean(list) 383 | list |> Enum.map(&Math.pow(&1 - lmean, n)) |> mean 384 | end 385 | 386 | @doc """ 387 | Computes the skewness of a data set. 388 | 389 | For normally distributed data, the skewness should be about 0. A skewness 390 | value > 0 means that there is more weight in the left tail of the 391 | distribution. 392 | 393 | ## Examples 394 | 395 | iex> Statistics.skew([]) 396 | nil 397 | iex> Statistics.skew([1,2,3,2,1]) 398 | 0.3436215967445454 399 | 400 | """ 401 | @spec skew([number]) :: number | nil 402 | def skew([]), do: nil 403 | 404 | def skew(list) do 405 | m2 = moment(list, 2) 406 | m3 = moment(list, 3) 407 | m3 / Math.pow(m2, 1.5) 408 | end 409 | 410 | @doc """ 411 | Computes the kurtosis (Fisher) of a list. 412 | 413 | Kurtosis is the fourth central moment divided by the square of the variance. 414 | 415 | ## Examples 416 | 417 | iex> Statistics.kurtosis([]) 418 | nil 419 | iex> Statistics.kurtosis([1,2,3,2,1]) 420 | -1.1530612244897964 421 | 422 | """ 423 | @spec kurtosis([number]) :: number | nil 424 | def kurtosis([]), do: nil 425 | 426 | def kurtosis(list) do 427 | m2 = moment(list, 2) 428 | m4 = moment(list, 4) 429 | # pearson 430 | p = m4 / Math.pow(m2, 2.0) 431 | # fisher 432 | p - 3 433 | end 434 | 435 | @doc """ 436 | Calculate a standard `z` score for each item in a list 437 | 438 | ## Examples 439 | 440 | iex> Statistics.zscore([3,2,3,4,5,6,5,4,3]) 441 | [-0.7427813527082074, -1.5784103745049407, -0.7427813527082074, 442 | 0.09284766908852597, 0.9284766908852594, 1.7641057126819928, 443 | 0.9284766908852594, 0.09284766908852597, -0.7427813527082074] 444 | 445 | """ 446 | @spec zscore([number]) :: list | nil 447 | def zscore(list) when is_list(list) do 448 | lmean = mean(list) 449 | lstdev = stdev(list) 450 | for n <- list, do: (n - lmean) / lstdev 451 | end 452 | 453 | @doc """ 454 | Calculate the the Pearson product-moment correlation coefficient of two lists. 455 | 456 | The two lists are presumed to represent matched pairs of observations, the `x` and `y` of a simple regression. 457 | 458 | ## Examples 459 | 460 | iex> Statistics.correlation([1,2,3,4], [1,3,5,6]) 461 | 0.9897782665572894 462 | 463 | """ 464 | @spec correlation([number], [number]) :: number 465 | def correlation(x, y) when length(x) == length(y) do 466 | xmean = mean(x) 467 | ymean = mean(y) 468 | 469 | numer = 470 | Enum.zip(x, y) 471 | |> Enum.map(fn {xi, yi} -> (xi - xmean) * (yi - ymean) end) 472 | |> sum 473 | 474 | denom_x = 475 | x 476 | |> Enum.map(fn xi -> (xi - xmean) * (xi - xmean) end) 477 | |> sum 478 | 479 | denom_y = 480 | y 481 | |> Enum.map(fn yi -> (yi - ymean) * (yi - ymean) end) 482 | |> sum 483 | 484 | numer / Math.sqrt(denom_x * denom_y) 485 | end 486 | 487 | @doc """ 488 | Calculate the covariance of two lists. 489 | 490 | Covariance is a measure of how much two random variables change together. 491 | The two lists are presumed to represent matched pairs of observations, such as the `x` and `y` of a simple regression. 492 | 493 | ## Examples 494 | 495 | iex> Statistics.covariance([1,2,3,2,1], [1,4,5.2,7,99]) 496 | -17.89 497 | 498 | """ 499 | @spec covariance([number], [number]) :: number 500 | def covariance(x, y) when length(x) == length(y) do 501 | xmean = mean(x) 502 | ymean = mean(y) 503 | size = length(x) 504 | 505 | Enum.zip(x, y) 506 | |> Enum.map(fn {xi, yi} -> (xi - xmean) * (yi - ymean) end) 507 | |> Enum.map(fn i -> i / (size - 1) end) 508 | |> sum 509 | end 510 | 511 | ## helpers and other flotsam 512 | 513 | import Integer, only: [is_even: 1, is_odd: 1] 514 | 515 | # Split a list into two equal lists. 516 | # Needed for getting the quartiles. 517 | defp split(list) when is_list(list) do 518 | do_split(Enum.sort(list), length(list)) 519 | end 520 | 521 | defp do_split(sorted_list, l) when is_even(l) do 522 | m = :erlang.trunc(l / 2) 523 | {Enum.take(sorted_list, m), Enum.drop(sorted_list, m)} 524 | end 525 | 526 | defp do_split(sorted_list, l) when is_odd(l) do 527 | m = :erlang.trunc((l + 1) / 2) 528 | {Enum.take(sorted_list, m), Enum.drop(sorted_list, m - 1)} 529 | end 530 | end 531 | -------------------------------------------------------------------------------- /lib/statistics/distributions/beta.ex: -------------------------------------------------------------------------------- 1 | defmodule Statistics.Distributions.Beta do 2 | alias Statistics.Math 3 | alias Statistics.Math.Functions 4 | 5 | @moduledoc """ 6 | The Beta distribution 7 | """ 8 | 9 | @doc """ 10 | The probability density function 11 | 12 | ## Examples 13 | 14 | iex> Statistics.Distributions.Beta.pdf(1,100).(0.1) 15 | 0.0029512665430652825 16 | 17 | """ 18 | @spec pdf(number, number) :: fun 19 | def pdf(a, b) do 20 | bab = Functions.beta(a, b) 21 | 22 | fn x -> 23 | cond do 24 | x <= 0.0 -> 25 | 0.0 26 | 27 | true -> 28 | Math.pow(x, a - 1) * Math.pow(1 - x, b - 1) / bab 29 | end 30 | end 31 | end 32 | 33 | @doc """ 34 | The cumulative density function 35 | 36 | ## Examples 37 | 38 | iex> Statistics.Distributions.Beta.cdf(1,100).(0.1) 39 | 0.9996401052677814 40 | 41 | """ 42 | @spec cdf(number, number) :: fun 43 | def cdf(a, b) do 44 | fn x -> 45 | Functions.simpson(pdf(a, b), 0, x, 10000) 46 | end 47 | end 48 | 49 | @doc """ 50 | The percentile-point function 51 | 52 | ## Examples 53 | 54 | iex> Statistics.Distributions.Beta.ppf(1,100).(0.1) 55 | 0.001053089271799999 56 | 57 | """ 58 | @spec ppf(number, number) :: fun 59 | def ppf(a, b) do 60 | fn x -> 61 | ppf_tande(cdf(a, b), x) 62 | end 63 | end 64 | 65 | defp ppf_tande(cdf, x) do 66 | ppf_tande(cdf, x, 0.0, 14, 0) 67 | end 68 | 69 | defp ppf_tande(_, _, guess, precision, precision) do 70 | guess 71 | end 72 | 73 | defp ppf_tande(cdf, x, guess, precision, current_precision) do 74 | # add 1/10**precision'th of the max value to the min 75 | new_guess = guess + 1 / Math.pow(10, current_precision) 76 | # if it's less than the PPF we want, do it again 77 | if cdf.(new_guess) < x do 78 | ppf_tande(cdf, x, new_guess, precision, current_precision) 79 | else 80 | # otherwise (it's greater), increase the current_precision 81 | # and recurse with original guess 82 | ppf_tande(cdf, x, guess, precision, current_precision + 1) 83 | end 84 | end 85 | 86 | @doc """ 87 | Draw a random number from a Beta distribution 88 | 89 | ## Examples 90 | 91 | iex> Statistics.Distributions.Beta.rand(1,100) 92 | 0.005922672626035741 93 | 94 | """ 95 | @spec rand(number, number) :: number 96 | def rand(a, b), do: rand(pdf(a, b)) 97 | 98 | defp rand(rpdf) do 99 | # beta only exists between 0 and 1 100 | x = Math.rand() 101 | 102 | if rpdf.(x) > Math.rand() do 103 | x 104 | else 105 | # keep trying 106 | rand(rpdf) 107 | end 108 | end 109 | end 110 | -------------------------------------------------------------------------------- /lib/statistics/distributions/binomial.ex: -------------------------------------------------------------------------------- 1 | defmodule Statistics.Distributions.Binomial do 2 | alias Statistics.Math 3 | 4 | @moduledoc """ 5 | Binomial distribution. 6 | 7 | This models the expected outcome of a number 8 | of binary trials, each with known probability, 9 | (often called a Bernoulli trial) 10 | """ 11 | 12 | @doc """ 13 | The probability mass function. 14 | 15 | Note that calling the mass function with a `Float` will return `nil` because 16 | this is a discrete probability distribution which only includes integer values. 17 | 18 | ## Examples 19 | 20 | iex> Statistics.Distributions.Binomial.pmf(4, 0.5).(2) 21 | 0.375 22 | iex> Statistics.Distributions.Binomial.pmf(4, 0.5).(0.2) 23 | nil 24 | 25 | """ 26 | @spec pmf(non_neg_integer, number) :: fun 27 | def pmf(n, p) do 28 | fn k -> 29 | cond do 30 | k < 0.0 -> 31 | 0.0 32 | 33 | n < k -> 34 | 0.0 35 | 36 | k != Math.to_int(k) -> 37 | nil 38 | 39 | true -> 40 | Math.combination(n, k) * Math.pow(p, k) * Math.pow(1 - p, n - k) 41 | end 42 | end 43 | end 44 | 45 | @doc """ 46 | The cumulative density function 47 | 48 | ## Examples 49 | 50 | iex> Statistics.Distributions.Binomial.cdf(4, 0.5).(2) 51 | 0.6875 52 | 53 | """ 54 | @spec cdf(non_neg_integer, number) :: fun 55 | def cdf(n, p) do 56 | fn k -> 57 | 0..Math.to_int(Math.floor(k)) 58 | |> Enum.to_list() 59 | |> Enum.map(fn i -> Math.combination(n, i) * Math.pow(p, i) * Math.pow(1 - p, n - i) end) 60 | |> Enum.sum() 61 | end 62 | end 63 | 64 | @doc """ 65 | The percentile-point function 66 | 67 | ## Examples 68 | 69 | iex> Statistics.Distributions.Binomial.ppf(10, 0.5).(0.5) 70 | 5 71 | 72 | """ 73 | @spec ppf(non_neg_integer, number) :: fun 74 | def ppf(n, p) do 75 | fn x -> 76 | ppf_tande(x, n, p, cdf(n, p), 0) 77 | end 78 | end 79 | 80 | # trial-and-error method which refines guesses 81 | # to arbitrary number of decimal places 82 | defp ppf_tande(x, n, p, npcdf, g) do 83 | g_cdf = npcdf.(g) 84 | 85 | cond do 86 | x > g_cdf -> 87 | ppf_tande(x, n, p, npcdf, g + 1) 88 | 89 | x <= g_cdf -> 90 | g 91 | end 92 | end 93 | 94 | @doc """ 95 | Draw a random number from a binomial distribution 96 | 97 | Uses the [rejection sampling method](https://en.wikipedia.org/wiki/Rejection_sampling) 98 | and returns a rounded `Float`. 99 | 100 | ## Examples 101 | 102 | iex> Statistics.Distributions.Binomial.rand(10, 0.5) 103 | 5.0 104 | 105 | """ 106 | @spec rand(non_neg_integer, number) :: non_neg_integer 107 | def rand(n, p), do: rand(n, p, pmf(n, p)) 108 | 109 | defp rand(n, p, rpmf) do 110 | x = Math.rand() * n 111 | 112 | if rpmf.(x) > Math.rand() do 113 | Float.round(x) 114 | else 115 | # keep trying 116 | rand(n, p, rpmf) 117 | end 118 | end 119 | end 120 | -------------------------------------------------------------------------------- /lib/statistics/distributions/chisq.ex: -------------------------------------------------------------------------------- 1 | defmodule Statistics.Distributions.Chisq do 2 | alias Statistics.Math 3 | alias Statistics.Math.Functions 4 | 5 | @moduledoc """ 6 | Chi square distribution. 7 | 8 | Takes a *degrees of freedom* parameter. 9 | """ 10 | 11 | @doc """ 12 | The probability density function 13 | 14 | ## Examples 15 | 16 | iex> Statistics.Distributions.Chisq.pdf(1).(2) 17 | 0.10377687435514868 18 | 19 | """ 20 | @spec pdf(non_neg_integer) :: fun 21 | def pdf(df) do 22 | hdf = df / 2 23 | g = Math.pow(2, hdf) * Functions.gamma(hdf) 24 | 25 | fn x -> 1 / g * Math.pow(x, hdf - 1) * Math.exp(-1 * x / 2) end 26 | end 27 | 28 | @doc """ 29 | The cumulative density function 30 | 31 | ## Examples 32 | 33 | iex> Statistics.Distributions.Chisq.cdf(2).(2) 34 | 0.6321205588285578 35 | 36 | """ 37 | @spec cdf(non_neg_integer) :: fun 38 | def cdf(df) do 39 | hdf = df / 2.0 40 | g = Functions.gamma(hdf) 41 | 42 | fn x -> 43 | b = Functions.gammainc(hdf, x / 2.0) 44 | b / g 45 | end 46 | end 47 | 48 | @doc """ 49 | The percentile-point function 50 | 51 | ## Examples 52 | 53 | iex> Statistics.Distributions.Chisq.ppf(1).(0.95) 54 | 3.841458820694101 55 | 56 | """ 57 | @spec ppf(non_neg_integer) :: fun 58 | def ppf(df) do 59 | fn x -> 60 | ppf_tande(x, cdf(df)) 61 | end 62 | end 63 | 64 | # trial-and-error method which refines guesses 65 | # to arbitrary number of decimal places 66 | defp ppf_tande(x, tcdf, precision \\ 14) do 67 | ppf_tande(x, tcdf, 0, precision + 2, 0) 68 | end 69 | 70 | defp ppf_tande(_, _, g, precision, precision) do 71 | g 72 | end 73 | 74 | defp ppf_tande(x, tcdf, g, precision, p) do 75 | increment = 100 / Math.pow(10, p) 76 | guess = g + increment 77 | 78 | if x < tcdf.(guess) do 79 | ppf_tande(x, tcdf, g, precision, p + 1) 80 | else 81 | ppf_tande(x, tcdf, guess, precision, p) 82 | end 83 | end 84 | 85 | @doc """ 86 | Draw a random number from a t distribution with specified degrees of freedom 87 | 88 | Uses the [rejection sampling method](https://en.wikipedia.org/wiki/Rejection_sampling) 89 | 90 | ## Examples 91 | 92 | iex> Statistics.Distributions.Chisq.rand(2) 93 | 1.232433646523534767 94 | 95 | """ 96 | @spec rand(non_neg_integer) :: number 97 | def rand(df), do: rand(df, cdf(df)) 98 | 99 | defp rand(df, rcdf) do 100 | x = Math.rand() * 100 101 | 102 | if rcdf.(x) > Math.rand() do 103 | x 104 | else 105 | # keep trying 106 | rand(df, rcdf) 107 | end 108 | end 109 | end 110 | -------------------------------------------------------------------------------- /lib/statistics/distributions/exponential.ex: -------------------------------------------------------------------------------- 1 | defmodule Statistics.Distributions.Exponential do 2 | @moduledoc """ 3 | Exponential distribution. 4 | 5 | `lambda` is the rate parameter and must be greater than zero. 6 | """ 7 | 8 | alias Statistics.Math 9 | 10 | @doc """ 11 | The probability density function 12 | 13 | ## Examples 14 | 15 | iex> Statistics.Distributions.Exponential.pdf().(1) 16 | 0.36787944117144233 17 | 18 | """ 19 | @spec pdf() :: fun 20 | @spec pdf(number) :: fun 21 | def pdf() do 22 | pdf(1) 23 | end 24 | 25 | def pdf(lambda) do 26 | fn x -> 27 | cond do 28 | x < 0 -> 29 | 0 30 | 31 | lambda <= 0 -> 32 | :nan 33 | 34 | true -> 35 | lambda * Math.exp(-lambda * x) 36 | end 37 | end 38 | end 39 | 40 | @doc """ 41 | The cumulative density function 42 | 43 | ## Examples 44 | 45 | iex> Statistics.Distributions.Exponential.cdf().(1) 46 | 0.6321205588285577 47 | 48 | """ 49 | @spec cdf() :: fun 50 | @spec cdf(number) :: fun 51 | def cdf() do 52 | cdf(1) 53 | end 54 | 55 | def cdf(lambda) do 56 | fn x -> 57 | cond do 58 | x < 0 -> 59 | 0 60 | 61 | lambda <= 0 -> 62 | :nan 63 | 64 | true -> 65 | 1 - Math.exp(-lambda * x) 66 | end 67 | end 68 | end 69 | 70 | @doc """ 71 | The percentile-point function 72 | 73 | ## Examples 74 | 75 | iex> Statistics.Distributions.Exponential.ppf().(0.1) 76 | 0.10536051565782628 77 | 78 | """ 79 | @spec ppf() :: fun 80 | @spec ppf(number) :: fun 81 | def ppf() do 82 | ppf(1) 83 | end 84 | 85 | def ppf(lambda) do 86 | fn x -> 87 | cond do 88 | x == 1 -> 89 | :inf 90 | 91 | x < 0 or x > 1 or lambda < 0 -> 92 | :nan 93 | 94 | true -> 95 | -1 * Math.ln(1 - x) / lambda 96 | end 97 | end 98 | end 99 | 100 | @doc """ 101 | Draw a random variate from the distribution with specified lambda 102 | 103 | Uses the closed-form inverse CDF (PPF) evaluated with uniform number between 0.0 and 1.0 104 | 105 | ## Examples 106 | 107 | iex> Statistics.Distributions.Exponential.rand() 108 | 0.145709384787 109 | 110 | """ 111 | @spec rand() :: number 112 | @spec rand(number) :: number 113 | def rand() do 114 | rand(1) 115 | end 116 | 117 | def rand(lambda) do 118 | ppf(lambda).(Math.rand()) 119 | end 120 | end 121 | -------------------------------------------------------------------------------- /lib/statistics/distributions/f.ex: -------------------------------------------------------------------------------- 1 | defmodule Statistics.Distributions.F do 2 | alias Statistics.Math 3 | alias Statistics.Math.Functions 4 | alias Statistics.Distributions.Beta 5 | 6 | @moduledoc """ 7 | The F distribution 8 | 9 | Note that `ppf/2` and `rand/2` here are very slow. 10 | """ 11 | 12 | @doc """ 13 | The probability density function 14 | 15 | ## Examples 16 | 17 | iex> Statistics.Distributions.F.pdf(1,1).(1) 18 | 0.15915494309189537 19 | 20 | """ 21 | @spec pdf(number, number) :: fun 22 | def pdf(d1, d2) do 23 | powa = Math.pow(d2, d2) 24 | cfac = Functions.beta(d1 / 2, d2 / 2) 25 | 26 | fn x -> 27 | # create components 28 | a = Math.pow(d1 * x, d1) * powa 29 | b = Math.pow(d1 * x + d2, d1 + d2) 30 | c = x * cfac 31 | # for the equation 32 | Math.sqrt(a / b) / c 33 | end 34 | end 35 | 36 | @doc """ 37 | The cumulative density function 38 | 39 | ## Examples 40 | 41 | iex> Statistics.Distributions.F.cdf(1,1).(1) 42 | 0.4971668763845647 43 | 44 | NOTE this is rather imprecise owing to the use 45 | of numerical integration of `Beta.pdf/2` to 46 | approximate the regularised incomplete beta function 47 | """ 48 | # NOTE the cdf is defined in terms of 49 | # the regularised incomplete Beta function 50 | # which is the CDF of the Beta distribution 51 | @spec cdf(number, number) :: fun 52 | def cdf(d1, d2) do 53 | bcdf = Beta.cdf(d1 / 2, d2 / 2) 54 | 55 | fn x -> 56 | xx = d1 * x / (d1 * x + d2) 57 | bcdf.(xx) 58 | end 59 | end 60 | 61 | @doc """ 62 | The percentile-point function 63 | 64 | ## Examples 65 | 66 | iex> Statistics.Distributions.F.ppf(1,1).(1) 67 | 1.0180414899099999 68 | 69 | """ 70 | @spec ppf(number, number) :: fun 71 | def ppf(d1, d2) do 72 | fn x -> 73 | ppf_tande(cdf(d1, d2), x) 74 | end 75 | end 76 | 77 | # trial-and-error method which refines guesses 78 | defp ppf_tande(cdf, x) do 79 | ppf_tande(cdf, x, 0.0, 14, 0) 80 | end 81 | 82 | defp ppf_tande(_, _, guess, precision, current_precision) when current_precision >= precision do 83 | guess 84 | end 85 | 86 | defp ppf_tande(cdf, x, guess, precision, current_precision) do 87 | # add 1/10**precision'th of the max value to the min 88 | new_guess = guess + 100_000 / Math.pow(10, current_precision) 89 | cg = cdf.(new_guess) 90 | # if it's less than the PPF we want, do it again 91 | if cg < x do 92 | ppf_tande(cdf, x, new_guess, precision, current_precision + 0.1) 93 | else 94 | # otherwise (it's greater), increase the current_precision 95 | # and recurse with original guess 96 | ppf_tande(cdf, x, guess, precision, current_precision + 1) 97 | end 98 | end 99 | 100 | @doc """ 101 | Draw a random number from an F distribution 102 | """ 103 | @spec rand(number, number) :: number 104 | def rand(d1, d2) do 105 | ceil = ppf(d1, d2).(0.999) 106 | do_rand(pdf(d1, d2), ceil) 107 | end 108 | 109 | defp do_rand(pdf, ceil) do 110 | x = Math.rand() * ceil 111 | 112 | if pdf.(x) > Math.rand() do 113 | x 114 | else 115 | # keep trying 116 | do_rand(pdf, ceil) 117 | end 118 | end 119 | end 120 | -------------------------------------------------------------------------------- /lib/statistics/distributions/hypergeometric.ex: -------------------------------------------------------------------------------- 1 | defmodule Statistics.Distributions.Hypergeometric do 2 | @moduledoc """ 3 | Hypergeometric distribution. 4 | 5 | It models the probability that an n numbers of trials 6 | result in exactly k successes, with a population of pn items, 7 | where pk are considered as successes. 8 | """ 9 | 10 | alias Statistics.Math 11 | 12 | @doc """ 13 | The probability mass function 14 | ## Examples 15 | iex> Statistics.Distributions.Hypergeometric.pmf(50, 5, 10).(4) 16 | 0.003964583058015066 17 | """ 18 | @spec pmf(non_neg_integer, non_neg_integer, non_neg_integer) :: fun 19 | def pmf(pn, pk, n) do 20 | combos = Math.combination(pn, n) 21 | 22 | fn k -> 23 | cond do 24 | n < k -> 25 | 0.0 26 | 27 | pn < n -> 28 | 0.0 29 | 30 | pn == pk && n != k -> 31 | 0.0 32 | 33 | pn == pk -> 34 | 1.0 35 | 36 | true -> 37 | xk = Math.to_int(k) 38 | Math.combination(pk, xk) * Math.combination(pn - pk, n - xk) / combos 39 | end 40 | end 41 | end 42 | 43 | @doc """ 44 | The cumulative density function 45 | ## Examples 46 | iex> Statistics.Distributions.Hypergeometric.cdf(52, 5, 13).(2) 47 | 0.9072328931572629 48 | """ 49 | @spec cdf(non_neg_integer, non_neg_integer, non_neg_integer) :: fun 50 | def cdf(pn, pk, n) do 51 | cpmf = pmf(pn, pk, n) 52 | 53 | fn k -> 54 | 0..Math.to_int(Math.floor(k)) 55 | |> Enum.to_list() 56 | |> Enum.map(fn i -> cpmf.(i) end) 57 | |> Enum.sum() 58 | end 59 | end 60 | 61 | @doc """ 62 | The percentile-point function 63 | ## Examples 64 | iex> Statistics.Distributions.Hypergeometric.ppf(80, 20, 50).(0.1) 65 | 10 66 | """ 67 | @spec ppf(non_neg_integer, non_neg_integer, non_neg_integer) :: fun 68 | def ppf(pn, pk, n) do 69 | fn x -> 70 | ppf_tande(x, cdf(pn, pk, n), 0) 71 | end 72 | end 73 | 74 | # trial-and-error method which refines guesses 75 | # to arbitrary number of decimal places 76 | 77 | defp ppf_tande(x, tcdf, guess) do 78 | g_cdf = tcdf.(guess) 79 | 80 | cond do 81 | x > g_cdf -> 82 | ppf_tande(x, tcdf, guess + 1) 83 | 84 | x <= g_cdf -> 85 | guess 86 | end 87 | end 88 | 89 | @doc """ 90 | Draw a random number from hypergeometric distribution 91 | """ 92 | @spec rand(non_neg_integer, non_neg_integer, non_neg_integer) :: non_neg_integer 93 | def rand(pn, pk, n), do: rand(pk, pmf(pn, pk, n)) 94 | 95 | defp rand(pk, rpmf) do 96 | x = Math.floor(Math.rand() * pk) 97 | 98 | if rpmf.(x) > Math.rand() do 99 | Float.round(x) 100 | else 101 | # keep trying 102 | rand(pk, rpmf) 103 | end 104 | end 105 | end 106 | -------------------------------------------------------------------------------- /lib/statistics/distributions/normal.ex: -------------------------------------------------------------------------------- 1 | defmodule Statistics.Distributions.Normal do 2 | @moduledoc """ 3 | The normal, or gaussian, distribution 4 | 5 | When invoking the distibution functions without parameters, 6 | a distribution with mean of 0 and standard deviation of 1 is assumed. 7 | """ 8 | 9 | alias Statistics.Math 10 | alias Statistics.Math.Functions 11 | 12 | @doc """ 13 | Probability density function 14 | 15 | Roughly the expectation of a given value in the distribution 16 | 17 | ## Examples 18 | 19 | iex> Statistics.Distributions.Normal.pdf().(0) 20 | 0.3989422804014327 21 | iex> Statistics.Distributions.Normal.pdf(0.2, 1).(1.3) 22 | 0.21785217703255055 23 | 24 | """ 25 | @spec pdf :: fun 26 | def pdf do 27 | pdf(0, 1) 28 | end 29 | 30 | @spec pdf(number, number) :: fun 31 | def pdf(mu, sigma) do 32 | fn x -> 33 | numexp = Math.pow(x - mu, 2) / (2 * Math.pow(sigma, 2)) 34 | denom = sigma * Math.sqrt(2 * Math.pi()) 35 | numer = Math.pow(Math.e(), numexp * -1) 36 | numer / denom 37 | end 38 | end 39 | 40 | @doc """ 41 | The cumulative density function 42 | 43 | The probability that a value lies below `x` 44 | 45 | Cumulative gives a probability that a statistic 46 | is less than Z. This equates to the area of the distribution below Z. 47 | e.g: Pr(Z = 0.69) = 0.7549. This value is usually given in Z tables. 48 | 49 | ## Examples 50 | 51 | iex> Statistics.Distributions.Normal.cdf().(2) 52 | 0.9772499371127437 53 | iex> Statistics.Distributions.Normal.cdf(0,1).(0) 54 | 0.5000000005 55 | 56 | """ 57 | @spec cdf :: fun 58 | def cdf() do 59 | cdf(0, 1) 60 | end 61 | 62 | @spec cdf(number, number) :: fun 63 | def cdf(mu, sigma) do 64 | denom = sigma * Math.sqrt(2) 65 | 66 | fn x -> 67 | 0.5 * (1.0 + Functions.erf((x - mu) / denom)) 68 | end 69 | end 70 | 71 | @doc """ 72 | The percentile-point function 73 | 74 | Get the maximum point which lies below the given probability. 75 | This is the inverse of the cdf 76 | 77 | ## Examples 78 | 79 | iex> Statistics.Distributions.Normal.ppf().(0.025) 80 | -1.96039491692534 81 | iex> Statistics.Distributions.Normal.ppf(7, 2.1).(0.25) 82 | 5.584202805909036 83 | 84 | """ 85 | @spec ppf :: fun 86 | def ppf() do 87 | ppf(0, 1) 88 | end 89 | 90 | @spec ppf(number, number) :: fun 91 | def ppf(mu, sigma) do 92 | res = fn p -> 93 | mu + p * sigma 94 | end 95 | 96 | fn x -> 97 | cond do 98 | x < 0.5 -> 99 | res.(-Functions.inv_erf(Math.sqrt(-2.0 * Math.ln(x)))) 100 | 101 | x >= 0.5 -> 102 | res.(Functions.inv_erf(Math.sqrt(-2.0 * Math.ln(1 - x)))) 103 | end 104 | end 105 | end 106 | 107 | @doc """ 108 | Draw a random number from a normal distribution 109 | 110 | `rnd/0` will return a random number from a normal distribution 111 | with a mean of 0 and a standard deviation of 1 112 | 113 | `rnd/2` allows you to provide the mean and standard deviation 114 | parameters of the distribution from which the random number is drawn 115 | 116 | Uses the [rejection sampling method](https://en.wikipedia.org/wiki/Rejection_sampling) 117 | 118 | ## Examples 119 | 120 | iex> Statistics.Distributions.Normal.rand() 121 | 1.5990817245679434 122 | iex> Statistics.Distributions.Normal.rand(22, 2.3) 123 | 23.900248900049736 124 | 125 | """ 126 | @spec rand() :: number 127 | def rand do 128 | rand(0, 1) 129 | end 130 | 131 | @spec rand(number, number) :: number 132 | def rand(mu, sigma), do: rand(mu, sigma, pdf(0, 1)) 133 | 134 | defp rand(mu, sigma, rpdf) do 135 | # Note: an alternate method exists and may be better 136 | # Inverse transform sampling - https://en.wikipedia.org/wiki/Inverse_transform_sampling 137 | # ---- 138 | # Generate a random number between -10,+10 139 | # (probability of 10 ocurring in a Normal(0,1) distribution is 140 | # too small to calculate with the precision available to us) 141 | x = Math.rand() * 20 - 10 142 | 143 | cond do 144 | rpdf.(x) > Math.rand() -> 145 | # transpose to specified distribution 146 | mu - x * sigma 147 | 148 | true -> 149 | # keep trying 150 | rand(mu, sigma, rpdf) 151 | end 152 | end 153 | end 154 | -------------------------------------------------------------------------------- /lib/statistics/distributions/poisson.ex: -------------------------------------------------------------------------------- 1 | defmodule Statistics.Distributions.Poisson do 2 | @moduledoc """ 3 | The Poisson distribution is a discrete probablility distribution. 4 | 5 | It models the probability of a given number of events occurring 6 | in a fixed interval if the events occur with a known average rate 7 | and are independent of the previous event. 8 | 9 | """ 10 | 11 | alias Statistics.Math 12 | 13 | @doc """ 14 | Probability mass function 15 | 16 | ## Examples 17 | 18 | iex> Statistics.Distributions.Poisson.pmf(1).(1) 19 | 0.36787944117144233 20 | 21 | """ 22 | @spec pmf(number) :: fun 23 | def pmf(lambda) do 24 | nexp = Math.exp(-lambda) 25 | 26 | fn k -> 27 | Math.pow(lambda, k) / Math.factorial(k) * nexp 28 | end 29 | end 30 | 31 | @doc """ 32 | Get the probability that a value lies below `k` 33 | 34 | ## Examples 35 | 36 | iex> Statistics.Distributions.Poisson.cdf(1).(1) 37 | 0.73575888234288467 38 | 39 | """ 40 | @spec cdf(number) :: fun 41 | def cdf(lambda) do 42 | nexp = Math.exp(-1 * lambda) 43 | 44 | fn k -> 45 | s = 46 | Enum.map(0..Math.to_int(k), fn x -> Math.pow(lambda, x) / Math.factorial(x) end) 47 | |> Enum.sum() 48 | 49 | nexp * s 50 | end 51 | end 52 | 53 | @doc """ 54 | The percentile-point function 55 | 56 | Get the maximum point which lies below the given probability. 57 | This is the inverse of the cdf and will take only positive integer values 58 | (but returns a float) 59 | 60 | ## Examples 61 | 62 | iex> Statistics.Distributions.Poisson.ppf(1).(0.95) 63 | 3.0 64 | 65 | """ 66 | @spec ppf(number) :: fun 67 | def ppf(lambda) do 68 | lcdf = cdf(lambda) 69 | 70 | fn x -> 71 | ppf_tande(x, lcdf, 0.0) 72 | end 73 | end 74 | 75 | # the trusty trial-and-error method 76 | defp ppf_tande(x, lcdf, guess) do 77 | if x > lcdf.(guess) do 78 | ppf_tande(x, lcdf, guess + 1) 79 | else 80 | guess 81 | end 82 | end 83 | 84 | @doc """ 85 | Draw a random number from this distribution 86 | 87 | This is a discrete distribution and the values it can take are positive integers. 88 | 89 | ## Examples 90 | 91 | iex> Statistics.Distributions.Poisson.rand(1) 92 | 1.0 93 | 94 | """ 95 | @spec rand(number) :: number 96 | def rand(lambda), do: rand(lambda, pmf(lambda)) 97 | 98 | defp rand(lambda, lpmf) do 99 | x = (Math.rand() * 100 + lambda) |> Math.floor() 100 | 101 | if lpmf.(x) > Math.rand() do 102 | x 103 | else 104 | # keep trying 105 | rand(lambda, lpmf) 106 | end 107 | end 108 | end 109 | -------------------------------------------------------------------------------- /lib/statistics/distributions/t.ex: -------------------------------------------------------------------------------- 1 | defmodule Statistics.Distributions.T do 2 | alias Statistics.Math 3 | alias Statistics.Math.Functions 4 | 5 | @moduledoc """ 6 | Student's t distribution. 7 | 8 | This distribution is always centered around 0.0 and allows a *degrees of freedom* parameter. 9 | """ 10 | 11 | @doc """ 12 | The probability density function 13 | 14 | ## Examples 15 | 16 | iex> Statistics.Distributions.T.pdf(3).(0) 17 | 0.3675525969478612 18 | iex> Statistics.Distributions.T.pdf(1).(3.2) 19 | 0.028319384891796327 20 | 21 | """ 22 | @spec pdf(number) :: fun 23 | def pdf(df) do 24 | fac = Functions.gamma((df + 1) / 2) / (Math.sqrt(df * Math.pi()) * Functions.gamma(df / 2)) 25 | exp = (df + 1) / 2 * -1 26 | 27 | fn x -> fac * Math.pow(1 + x * x / df, exp) end 28 | end 29 | 30 | @doc """ 31 | The cumulative density function 32 | 33 | NOTE: this currently uses the very slow Simpson's Rule to execute 34 | a numerical integration of the `pdf` function to approximate 35 | the CDF. This leads to a trade-off between precision and speed. 36 | 37 | A robust implementation of the 2F1 hypergeometric function is 38 | required to properly calculate the CDF of the t distribution. 39 | 40 | ## Examples 41 | 42 | iex> Statistics.Distributions.T.cdf(3).(0) 43 | 0.4909182507070275 44 | 45 | """ 46 | @spec cdf(number) :: fun 47 | def cdf(df) do 48 | cpdf = pdf(df) 49 | fn x -> Functions.simpson(cpdf, -10000, x, 10000) end 50 | end 51 | 52 | # when a robust hyp2F1 materialises, use this implementation 53 | # defp cdf_hyp2f1(x, df) do 54 | # p1 = 0.5 + x * Functions.gamma((df+1)/2) 55 | # p2n = Math.hyp2f1(0.5, ((df+1)/2), 1.5, -1*Math.pow(x,2)/df) 56 | # p2d = Math.sqrt(Math.pi*df) * Functions.gamma(df/2) 57 | # p1 * (p2n / p2d) 58 | # end 59 | 60 | @doc """ 61 | The percentile-point function 62 | 63 | NOTE: this is very slow due to the current implementation of the CDF 64 | 65 | """ 66 | @spec ppf(number) :: fun 67 | def ppf(df) do 68 | fn x -> 69 | ppf_tande(x, cdf(df), 4) 70 | end 71 | end 72 | 73 | # trial-and-error method which refines guesses 74 | # to arbitrary number of decimal places 75 | defp ppf_tande(x, pcdf, precision) do 76 | ppf_tande(x, pcdf, -10, precision + 2, 0) 77 | end 78 | 79 | defp ppf_tande(_, _, g, precision, precision) do 80 | g 81 | end 82 | 83 | defp ppf_tande(x, pcdf, g, precision, p) do 84 | increment = 100 / Math.pow(10, p) 85 | guess = g + increment 86 | 87 | if x < pcdf.(guess) do 88 | ppf_tande(x, pcdf, g, precision, p + 1) 89 | else 90 | ppf_tande(x, pcdf, guess, precision, p) 91 | end 92 | end 93 | 94 | @doc """ 95 | Draw a random number from a t distribution with specified degrees of freedom 96 | """ 97 | @spec rand(number) :: number 98 | def rand(df), do: randf(pdf(df)) 99 | 100 | defp randf(rpdf) do 101 | # t-dist is fatter-tailed than normal 102 | x = Math.rand() * 50 - 25 103 | 104 | if rpdf.(x) > Math.rand() do 105 | x 106 | else 107 | # keep trying 108 | randf(rpdf) 109 | end 110 | end 111 | end 112 | -------------------------------------------------------------------------------- /lib/statistics/tests/t_test.ex: -------------------------------------------------------------------------------- 1 | defmodule Statistics.Tests.TTest do 2 | import Statistics 3 | import Statistics.Math 4 | alias Statistics.Distributions.T 5 | 6 | @moduledoc """ 7 | Student's t test 8 | 9 | """ 10 | 11 | @doc """ 12 | A two-sided test for the null hypothesis that the 13 | expected value (mean) of a sample of independent 14 | observations a is equal to the given population mean, `popmean`. 15 | 16 | Returns the _t_ statistic, and the _p_ value. 17 | 18 | ## Example 19 | 20 | iex> Statistics.Tests.TTest.one_sample([1,2,3,2,1], 3) 21 | %{p: 0.023206570788795993, t: -3.585685828003181} 22 | 23 | """ 24 | def one_sample(list, popmean) do 25 | df = length(list) - 1 26 | t = (mean(list) - popmean) / (stdev(list) / sqrt(length(list))) 27 | p = get_t_prob(t, df) 28 | %{t: t, p: p} 29 | end 30 | 31 | @doc """ 32 | A two-sided test for the null hypothesis that the 33 | mean of `list1` is different to the mean of `list2`. 34 | 35 | The variance of the lists should be equal but the 36 | sample size of each last can be different. 37 | 38 | Returns the _t_ statistic, and the _p_ value. 39 | 40 | ## Example 41 | 42 | iex> Statistics.Tests.TTest.ind_samples([1,2,3,2,1], [3,2,4,3,5]) 43 | %{p: 0.022802155958137702, t: -2.82842712474619} 44 | 45 | iex> Statistics.Tests.TTest.ind_samples([1,2,3,2,1], [3,2,4,3,5,4,5,6]) 46 | %{p: 0.0044530673387188, t: -3.5858542135407596} 47 | 48 | """ 49 | def ind_samples(list1, list2) do 50 | df = length(list1) + length(list2) - 2 51 | mu1 = mean(list1) 52 | mu2 = mean(list2) 53 | # calculate pooled standard deviation and 54 | # sample proportion differently when 55 | # sample sizes are unequal 56 | {sp, sz} = 57 | case length(list1) == length(list2) do 58 | true -> 59 | spt = sqrt((variance(list1) + variance(list2)) / 2) 60 | szt = sqrt(2 / length(list1)) 61 | {spt, szt} 62 | 63 | false -> 64 | # weight variances by sample size 65 | adj_var1 = (length(list1) - 1) * variance(list1) 66 | adj_var2 = (length(list2) - 1) * variance(list2) 67 | spf = sqrt((adj_var1 + adj_var2) / df) 68 | szf = sqrt(1 / length(list1) + 1 / length(list2)) 69 | {spf, szf} 70 | end 71 | 72 | t = (mu1 - mu2) / (sp * sz) 73 | p = get_t_prob(t, df) 74 | %{t: t, p: p} 75 | end 76 | 77 | defp get_t_prob(t, df) do 78 | c = T.cdf(df).(t) 79 | 80 | p = 81 | case t < 0.0 do 82 | true -> c 83 | false -> 1 - c 84 | end 85 | 86 | # two-sided test 87 | case p < 0.5 do 88 | true -> 2 * p 89 | false -> 1.0 90 | end 91 | end 92 | end 93 | -------------------------------------------------------------------------------- /mix.exs: -------------------------------------------------------------------------------- 1 | defmodule Statistics.Mixfile do 2 | use Mix.Project 3 | 4 | @version "0.6.3" 5 | 6 | def project do 7 | [ 8 | app: :statistics, 9 | version: @version, 10 | elixir: ">= 1.11.4", 11 | description: description(), 12 | package: package(), 13 | deps: deps() 14 | ] 15 | end 16 | 17 | def application do 18 | [] 19 | end 20 | 21 | defp deps do 22 | [ 23 | {:ex_doc, "~> 0.31", only: :dev, runtime: false} 24 | ] 25 | end 26 | 27 | defp description do 28 | """ 29 | Functions for descriptive statistics and common distributions 30 | """ 31 | end 32 | 33 | defp package do 34 | [ 35 | files: ["lib", "mix.exs", "README*", "LICENSE*"], 36 | maintainers: ["Max Sharples", "Kash Nouroozi"], 37 | licenses: ["Apache-2.0"], 38 | links: %{"GitHub" => "https://github.com/msharp/elixir-statistics"} 39 | ] 40 | end 41 | end 42 | -------------------------------------------------------------------------------- /package.exs: -------------------------------------------------------------------------------- 1 | Expm.Package.new( 2 | name: "statistics", 3 | description: "General statistical functions", 4 | keywords: ["statistics","stats"], 5 | version: File.read!("VERSION") |> String.strip, 6 | licenses: [[name: "MIT"]], 7 | maintainers: [[name: "Max Sharples", email: "maxsharples@gmail.com"]], 8 | repositories: [[github: "msharp/elixir-statistics"]] 9 | ) 10 | -------------------------------------------------------------------------------- /test/beta_distribution_test.exs: -------------------------------------------------------------------------------- 1 | defmodule BetaDistributionTest do 2 | use ExUnit.Case, async: true 3 | doctest Statistics.Distributions.Beta, except: [rand: 2] 4 | 5 | alias Statistics.Distributions.Beta 6 | 7 | test "output of the pdf function" do 8 | assert Beta.pdf(1, 3).(0.6) == 0.48000000000000054 9 | assert Beta.pdf(2, 5).(0.2) == 2.457600000000004 10 | assert Beta.pdf(2, 2).(0.8) == 0.9600000000000015 11 | end 12 | 13 | test "return a cdf " do 14 | assert Beta.cdf(1, 1).(0.5) == 0.4999833333333332 15 | assert Beta.cdf(2, 10).(0.1) == 0.30264311979999975 16 | assert Beta.cdf(2, 5).(0.2) == 0.34464000000000033 17 | end 18 | 19 | test "return a random number from the distribution" do 20 | assert is_float(Beta.rand(1, 2)) 21 | end 22 | 23 | test "get the percentile point value" do 24 | assert Beta.ppf(1, 2).(0.1) == 0.05131850509960005 25 | # the PPF is expensive - don't run all tests every time 26 | # assert Beta.ppf(2, 5).(0.5) == 0.26444998329559966 27 | # assert Beta.ppf(2, 10).(0.9) == 0.3102434478125001 28 | end 29 | end 30 | -------------------------------------------------------------------------------- /test/binomial_distribution_test.exs: -------------------------------------------------------------------------------- 1 | defmodule BinomialDistributionTest do 2 | use ExUnit.Case, async: true 3 | doctest Statistics.Distributions.Binomial, except: [rand: 2] 4 | 5 | alias Statistics.Distributions.Binomial, as: Binom 6 | alias Statistics.Math 7 | 8 | test "output of the pmf function" do 9 | assert Binom.pmf(1, 0.5).(0) == 0.5 10 | assert Binom.pmf(1, 0.5).(1) == 0.5 11 | assert Binom.pmf(4, 0.5).(4) == 0.0625 12 | assert Binom.pmf(4, 0.5).(2) == 0.375 13 | assert Binom.pmf(4, 0.5).(0.9) == nil 14 | assert Binom.pmf(100, 0.2).(20) == 0.09930021480882524 15 | assert Binom.pmf(5000, 0.0001).(1) == 0.30328807662005114 16 | end 17 | 18 | test "return a cdf " do 19 | assert Binom.cdf(4, 0.5).(2) == 0.6875 20 | assert Binom.cdf(100, 0.2).(20) == 0.5594615848734007 21 | end 22 | 23 | test "get the percentile point value" do 24 | assert Binom.ppf(10, 0.5).(0.5) == 5.0 25 | assert Binom.ppf(50, 0.2).(0.6) == 11.0 26 | end 27 | 28 | test "return a random number from binomial distribution" do 29 | n = 100 30 | r = Binom.rand(n, 0.5) 31 | # will return a float 32 | assert is_float(r) 33 | # but it should be an integer 34 | assert r == Math.to_int(r) 35 | # cannot be greater than number of trials 36 | assert r <= n 37 | # cannot be less than zero 38 | assert r >= 0 39 | end 40 | end 41 | -------------------------------------------------------------------------------- /test/chisq_distribution_test.exs: -------------------------------------------------------------------------------- 1 | defmodule ChisqDistributionTest do 2 | use ExUnit.Case, async: true 3 | doctest Statistics.Distributions.Chisq, except: [rand: 1] 4 | 5 | alias Statistics.Distributions.Chisq, as: Chisq 6 | 7 | test "output of the pdf function" do 8 | assert Chisq.pdf(2).(5) == 0.0410424993119494 9 | assert Chisq.pdf(22).(12) == 0.020651546706168852 10 | end 11 | 12 | test "return a cdf " do 13 | assert Chisq.cdf(1).(1) == 0.6826894921370861 14 | assert Chisq.cdf(2).(2) == 0.6321205588285578 15 | assert Chisq.cdf(23).(16.8) == 0.18105083862291943 16 | assert Chisq.cdf(77).(89.999) == 0.8524000316322364 17 | end 18 | 19 | test "return a random number from the distribution" do 20 | assert is_float(Chisq.rand(2)) 21 | # rands = for _ <- 0..10000, do: Chisq.rand(1) 22 | # assert Statistics.mean(rands) == 1 23 | end 24 | 25 | test "get the percentile point value" do 26 | assert Chisq.ppf(77).(0.95) == 98.48438345933911 27 | assert Chisq.ppf(7).(0.05) == 2.167349909298 28 | end 29 | end 30 | -------------------------------------------------------------------------------- /test/descriptive_test.exs: -------------------------------------------------------------------------------- 1 | defmodule DescriptiveTest do 2 | use ExUnit.Case, async: true 3 | doctest Statistics 4 | 5 | @null [] 6 | 7 | @a Enum.to_list(1..9) 8 | @b [4, 3, 3, 4, 5, 6, 7, 6, 5] 9 | @c Enum.to_list(1..15) 10 | @d @a ++ [8, 7, 6, 5, 4, 3] 11 | @e [1, 2, 3, 2, 1] 12 | @f Enum.to_list(1..6) 13 | @g [1] 14 | 15 | @x [1, 2, 3, 4, 12, 4, 2, 4, 6, 3, 5, 6, 7, 4, 7, 8, 2, 5] 16 | @y [1, 3, 5, 6, 5, 2, 7, 4, 6, 8, 2, 3, 9, 5, 2, 8, 9, 4] 17 | 18 | test "sum a list" do 19 | assert Statistics.sum(@a) == 45 20 | end 21 | 22 | test "calculate mean" do 23 | assert Statistics.mean(@null) == nil 24 | assert Statistics.mean([1]) == 1 25 | assert Statistics.mean(@a) == 5 26 | end 27 | 28 | test "get mode" do 29 | assert Statistics.mode(@null) == nil 30 | assert Statistics.mode(@a ++ [2, 2]) == 2 31 | end 32 | 33 | test "calculate median" do 34 | assert Statistics.median(@null) == nil 35 | assert Statistics.median(@a) == 5 36 | assert Statistics.median(@a -- [9]) == 4.5 37 | end 38 | 39 | test "get maximum" do 40 | assert Statistics.max(@null) == nil 41 | assert Statistics.max(@a ++ [99]) == 99 42 | end 43 | 44 | test "get minimum" do 45 | assert Statistics.min(@null) == nil 46 | assert Statistics.min([23, 45, 34, 53, 44, 65, 99, 1, 74, 32, 69]) == 1 47 | end 48 | 49 | test "get first quartile point" do 50 | assert Statistics.quartile(@a ++ [5], :first) == 3 51 | assert Statistics.quartile(@a, :first) == 3 52 | end 53 | 54 | test "get third quartile point" do 55 | assert Statistics.quartile(@a ++ [5], :third) == 7 56 | assert Statistics.quartile(@a, :third) == 7 57 | end 58 | 59 | test "get nth percentile score" do 60 | assert Statistics.percentile(@null, 12) == nil 61 | 62 | assert Statistics.percentile(@a, 0) == 1 63 | assert Statistics.percentile(@a, 20) == 2.6 64 | assert Statistics.percentile(@a, 80) == 7.4 65 | assert Statistics.percentile(@a, 100) == 9 66 | assert Statistics.percentile(@g, 50) == 1 67 | end 68 | 69 | test "get range" do 70 | assert Statistics.range(@null) == nil 71 | assert Statistics.range(@a) == 8 72 | end 73 | 74 | test "get inter-quartile range" do 75 | assert Statistics.iqr(@null) == nil 76 | assert Statistics.iqr(@a) == 4 77 | end 78 | 79 | test "calculate variance" do 80 | assert Statistics.variance(@null) == nil 81 | assert Statistics.variance(@b) == 1.7283950617283952 82 | end 83 | 84 | test "calculate standard deviation" do 85 | assert Statistics.stdev(@null) == nil 86 | assert Statistics.stdev(@b) == 1.314684396244359 87 | end 88 | 89 | test "calculate trimmed mean" do 90 | assert Statistics.trimmed_mean(@null, {1, 4}) == nil 91 | assert Statistics.trimmed_mean(@c, {4, 9}) == 6.5 92 | assert Statistics.trimmed_mean((@c ++ [5, 6, 7, 8]) -- [9], :iqr) == 7.3 93 | end 94 | 95 | test "calculate harmonic mean" do 96 | assert Statistics.harmonic_mean(@null) == nil 97 | assert Statistics.harmonic_mean(@c) == 4.5204836768674568 98 | end 99 | 100 | test "calculate geometric mean" do 101 | assert Statistics.geometric_mean(@null) == nil 102 | assert Statistics.geometric_mean(@f) == 2.9937951655239088 103 | end 104 | 105 | # moment/skew/kurtosis numbers match python/scipy 106 | 107 | test "calculate moment about the mean" do 108 | assert Statistics.moment(@null, 3) == nil 109 | 110 | assert Statistics.moment(@d, 1) == 0.0 111 | assert Statistics.moment(@d, 2) == 5.2266666666666675 112 | assert Statistics.moment(@d, 3) == -1.3440000000000025 113 | end 114 | 115 | test "calculate skewness" do 116 | assert Statistics.skew(@null) == nil 117 | assert Statistics.skew(@e) == 0.3436215967445454 118 | end 119 | 120 | test "calculate kurtosis (fisher)" do 121 | assert Statistics.kurtosis(@null) == nil 122 | assert Statistics.kurtosis(@e) == -1.1530612244897964 123 | end 124 | 125 | test "calculate standard score for items in a list" do 126 | expected = [ 127 | -0.7427813527082074, 128 | -1.5784103745049407, 129 | -0.7427813527082074, 130 | 0.09284766908852597, 131 | 0.9284766908852594, 132 | 1.7641057126819928, 133 | 0.9284766908852594, 134 | 0.09284766908852597, 135 | -0.7427813527082074 136 | ] 137 | 138 | assert Statistics.zscore([3, 2, 3, 4, 5, 6, 5, 4, 3]) == expected 139 | end 140 | 141 | test "calculate the correlation of 2 lists" do 142 | assert Statistics.correlation(@x, @y) == 0.09315273948675289 143 | assert_raise FunctionClauseError, fn -> Statistics.correlation(@x, @null) end 144 | end 145 | 146 | test "calculate the covariance of 2 lists" do 147 | assert Statistics.covariance(@x, @y) == 0.6307189542483661 148 | assert_raise FunctionClauseError, fn -> Statistics.covariance(@x, @null) end 149 | end 150 | end 151 | -------------------------------------------------------------------------------- /test/exponential_distribution_test.exs: -------------------------------------------------------------------------------- 1 | defmodule ExponentialDistributionTest do 2 | use ExUnit.Case, async: true 3 | doctest Statistics.Distributions.Exponential, except: [rand: 1, rand: 0] 4 | 5 | alias Statistics.Distributions.Exponential 6 | 7 | test "output of the pdf function" do 8 | assert Exponential.pdf().(-1) == 0 9 | assert Exponential.pdf(0).(1) == :nan 10 | assert Exponential.pdf(-1).(1) == :nan 11 | assert Exponential.pdf().(1) == 0.36787944117144233 12 | assert Exponential.pdf(3).(2) == 0.0074362565299990755 13 | assert Exponential.pdf(2).(9) == 3.0459959489425258e-08 14 | end 15 | 16 | test "return a cdf " do 17 | assert Exponential.cdf().(-1) == 0 18 | assert Exponential.cdf(0).(1) == :nan 19 | assert Exponential.cdf(-1).(1) == :nan 20 | assert Exponential.cdf().(1) == 0.63212055882855767 21 | assert Exponential.cdf(3).(2) == 0.9975212478233336 22 | assert Exponential.cdf(2).(9) == 0.99999998477002028 23 | end 24 | 25 | test "return a random number from the distribution" do 26 | assert is_float(Exponential.rand(2)) 27 | end 28 | 29 | test "get the percentile point value" do 30 | assert Exponential.ppf().(-1) == :nan 31 | assert Exponential.ppf(1).(1.2) == :nan 32 | assert Exponential.ppf().(0) == 0 33 | assert Exponential.ppf(1).(1) == :inf 34 | assert Exponential.ppf(1).(0.5) == 0.6931471805599453 35 | assert Exponential.ppf(4).(0.9) == 0.57564627324851148 36 | end 37 | 38 | test "generating many random variates gives roughly the expected mean" do 39 | n = 100_000 40 | lambda = 0.002 41 | expected_mean = 1 / lambda 42 | sample_mean = Enum.sum(Enum.map(1..n, fn _ -> Exponential.rand(lambda) end)) / n 43 | 44 | assert 0.95 * expected_mean <= sample_mean 45 | assert sample_mean <= 1.05 * expected_mean 46 | end 47 | end 48 | -------------------------------------------------------------------------------- /test/f_distribution_test.exs: -------------------------------------------------------------------------------- 1 | defmodule FDistributionTest do 2 | use ExUnit.Case, async: true 3 | doctest Statistics.Distributions.F, except: [rand: 2, ppf: 2] 4 | 5 | alias Statistics.Distributions.F 6 | 7 | test "output of the pdf function" do 8 | assert F.pdf(1, 1).(1) == 0.15915494309189537 9 | end 10 | 11 | test "return a cdf " do 12 | assert F.cdf(1, 1).(1) == 0.4971668763845647 13 | end 14 | 15 | test "return a random number from the distribution" do 16 | assert is_float(F.rand(1, 1)) 17 | end 18 | 19 | test "get the percentile point value" do 20 | assert F.ppf(1, 1).(0.05) == 0.0048621122317455395 21 | end 22 | end 23 | -------------------------------------------------------------------------------- /test/hypergeometric_distribution_test.exs: -------------------------------------------------------------------------------- 1 | defmodule HypergeometricDistributionTest do 2 | use ExUnit.Case, async: true 3 | doctest Statistics.Distributions.Hypergeometric, except: [rand: 2] 4 | 5 | alias Statistics.Distributions.Hypergeometric, as: Hyper 6 | alias Statistics.Math 7 | 8 | test "output of the pmf function" do 9 | assert Hyper.pmf(52, 5, 26).(2) == 0.3251300520208083 10 | assert Hyper.pmf(20, 10, 5).(1) == 0.13544891640866874 11 | assert Hyper.pmf(10, 10, 2).(1) == 0.0 12 | assert Hyper.pmf(10, 10, 2).(2) == 1.0 13 | end 14 | 15 | test "return a cdf " do 16 | assert Hyper.cdf(52, 5, 13).(2) == 0.9072328931572629 17 | assert Hyper.cdf(80, 50, 23).(10) == 0.02480510161897441 18 | assert Hyper.cdf(22, 9, 14).(2) == 0.0014916971573318324 19 | end 20 | 21 | test "get the percentile point value" do 22 | assert Hyper.ppf(80, 20, 50).(0.1) == 10.0 23 | assert Hyper.ppf(70, 10, 30).(0.75) == 5.0 24 | end 25 | 26 | test "return a random number from hypergeometric distribution" do 27 | pn = 100 28 | pk = 5 29 | n = 10 30 | r = Hyper.rand(pn, pk, n) 31 | # will return a float 32 | assert is_float(r) 33 | # but it should be an integer 34 | assert r == Math.to_int(r) 35 | # cannot be greater than number of trials 36 | assert r <= n 37 | # cannot be less than zero 38 | assert r >= 0 39 | end 40 | end 41 | -------------------------------------------------------------------------------- /test/math_functions_test.exs: -------------------------------------------------------------------------------- 1 | defmodule MathFunctionsTest do 2 | use ExUnit.Case, async: true 3 | doctest Statistics.Math.Functions 4 | 5 | alias Statistics.Math.Functions 6 | alias Statistics.Math 7 | 8 | alias Statistics.Distributions.T 9 | 10 | test "gamma function" do 11 | assert Functions.gamma(22) == 5.109094217170951e19 12 | assert Functions.gamma(0.02) == 49.44221016319569 13 | end 14 | 15 | test "incomplete gamma function" do 16 | assert Functions.gammainc(2, 2) == 0.5939941502901618 17 | assert Functions.gammainc(1, 2) == 0.8646647167633872 18 | assert Functions.gammainc(1, 1) == 0.63212055882855778 19 | # not a complete solution ... some cases that do not work 20 | # assert Functions.gammainc(4,1) == 0.018988156876153808 21 | # assert Functions.gammainc(0.1,1) == 0.97587265627367215 22 | end 23 | 24 | test "beta function" do 25 | assert Functions.beta(1, 2) == 0.4999999999999996 26 | assert Functions.beta(2, 2) == 0.16666666666666638 27 | assert Functions.beta(0.05, 1) == 20.000000000000007 28 | end 29 | 30 | test "hypergeometric 2F1 function" do 31 | # This is not a correct implementation, fails in many cases. 32 | # assert Functions.hyp2f1(0.2, 3, 0.2, 0.2) == 1.9531249999999998 33 | assert Functions.hyp2f1(1, 2, 1, 0.5) == 3.999999999999955 34 | assert Functions.hyp2f1(1, 1, 1, 0.5) == 1.9999999999999996 35 | end 36 | 37 | test "simpsons numeric integration rule" do 38 | f = fn x -> Math.pow(x, 9) end 39 | sr = Functions.simpson(f, 0, 10, 100_000) 40 | assert Math.round(sr, 1) == 1_000_000_000.0 41 | 42 | # integral of t.pdf(x, 1) at 2 and -2 43 | sr = Functions.simpson(T.pdf(1), -10000, 2, 100_000) 44 | assert sr == 0.8523845106569062 45 | sr = Functions.simpson(T.pdf(1), -10000, -2, 100_000) 46 | assert sr == 0.14755182730100083 47 | end 48 | end 49 | -------------------------------------------------------------------------------- /test/math_utils_test.exs: -------------------------------------------------------------------------------- 1 | defmodule MathTest do 2 | use ExUnit.Case, async: true 3 | doctest Statistics.Math 4 | 5 | alias Statistics.Math 6 | 7 | test "square root" do 8 | assert Math.sqrt(9) == 3 9 | assert Math.sqrt(99) == 9.9498743710662 10 | end 11 | 12 | test "raise to a power" do 13 | assert Math.pow(3, 2) == 9 14 | assert Math.pow(2, -1) == 0.5 15 | assert Math.pow(-0.5, -0.5) == -1.4142135623730951 16 | assert Math.pow(99, 3) == 970_299 17 | end 18 | 19 | test "constant e" do 20 | assert Math.e() == 2.718281828459045 21 | end 22 | 23 | test "constant pi" do 24 | assert Math.pi() == 3.141592653589793 25 | end 26 | 27 | test "natural log" do 28 | assert Math.ln(2) == 0.6931471805599453 29 | assert Math.ln(99) == 4.59511985013459 30 | end 31 | 32 | test "exponent function" do 33 | assert Math.exp(2) == 7.38905609893065 34 | end 35 | 36 | test "round a decimal" do 37 | assert Math.round(99.999999, 3) == 100 38 | assert Math.round(0.123456, 4) == 0.1235 39 | assert Math.round(0.123436, 4) == 0.1234 40 | assert Math.round(1.123456, 0) == 1 41 | end 42 | 43 | test "get absolute value" do 44 | assert Math.abs(-2) == 2 45 | assert Math.abs(2.2) == 2.2 46 | end 47 | 48 | test "calculate factorial" do 49 | assert Math.factorial(0) == 1 50 | assert Math.factorial(1.0) == 1 51 | assert Math.factorial(5) == 120 52 | assert Math.factorial(11) == 39_916_800 53 | end 54 | 55 | test "get the floor as a float" do 56 | assert Math.floor(2) == 2.0 57 | assert Math.floor(2.9999) == 2 58 | assert Math.floor(-2.2) == -3.0 59 | end 60 | 61 | test "get the ceiling as a float" do 62 | assert Math.ceil(2) == 2.0 63 | assert Math.ceil(2.9999) == 3.0 64 | assert Math.ceil(-2.2) == -2.0 65 | end 66 | 67 | test "turn a float into an integer" do 68 | assert Math.to_int(2) == 2 69 | assert Math.to_int(2.2) == 2 70 | assert Math.to_int(599.9) == 599 71 | end 72 | 73 | test "calculate K permutations for n items" do 74 | assert Math.permutation(1, 2) == 0 75 | assert Math.permutation(1, -1) == 0 76 | assert Math.permutation(-1, 1) == 0 77 | assert Math.permutation(2, 2) == 2 78 | assert Math.permutation(20, 5) == 1_860_480 79 | end 80 | 81 | test "calculate K combinations for n items" do 82 | assert Math.combination(1, 2) == 0 83 | assert Math.combination(1, -1) == 0 84 | assert Math.combination(-1, 1) == 0 85 | assert Math.combination(2, 2) == 1 86 | assert Math.combination(20, 5) == 15504 87 | end 88 | end 89 | -------------------------------------------------------------------------------- /test/normal_distribution_test.exs: -------------------------------------------------------------------------------- 1 | defmodule NormalDistributionTest do 2 | use ExUnit.Case, async: true 3 | doctest Statistics.Distributions.Normal, except: [rand: 0, rand: 3] 4 | 5 | alias Statistics.Distributions.Normal, as: Norm 6 | alias Statistics.Math 7 | 8 | # to get mitigate the vagaries of floating-point math 9 | # and rounding errors, test equivalence to 4 decimal places 10 | def assert_p(left, right, precision \\ 4) do 11 | assert Math.round(left, precision) == Math.round(right, precision) 12 | end 13 | 14 | test "output of the pdf function" do 15 | assert Norm.pdf().(0) == 0.3989422804014327 16 | assert Norm.pdf(0.2, 1).(3) == 0.00791545158297997 17 | assert Norm.pdf().(-1) == 0.24197072451914337 18 | assert Norm.pdf(23.5, 1.7).(22.0) == 0.15900173884840293 19 | end 20 | 21 | test "return a cdf " do 22 | assert Norm.cdf().(2) == 0.9772499371127437 23 | assert_p(Norm.cdf().(0), 0.5) 24 | assert Norm.cdf(2, 2.5).(2.8) == 0.6255157658802836 25 | assert_p(Norm.cdf(2, 2.5).(2), 0.5) 26 | end 27 | 28 | test "return a normally-distributed random number" do 29 | assert is_float(Norm.rand()) 30 | rands = for _ <- 0..10000, do: Norm.rand(5, 1.5) 31 | assert_p(Statistics.mean(rands), 5, 1) 32 | assert_p(Statistics.stdev(rands), 1.5, 1) 33 | end 34 | 35 | test "get the percentile point value" do 36 | assert Norm.ppf().(0.975) == 1.9603949169253396 37 | assert Norm.ppf().(0.025) == -1.96039491692534 38 | assert Norm.ppf().(0.75) == 0.6741891400433162 39 | assert Norm.ppf(7, 2.1).(0.25) == 5.584202805909036 40 | assert Norm.ppf(37.66, 1.31).(0.95) == 39.81522698658839 41 | end 42 | end 43 | -------------------------------------------------------------------------------- /test/poisson_distribution_test.exs: -------------------------------------------------------------------------------- 1 | defmodule PoissonDistributionTest do 2 | use ExUnit.Case, async: true 3 | doctest Statistics.Distributions.Poisson, except: [rand: 1] 4 | 5 | alias Statistics.Distributions.Poisson 6 | alias Statistics.Math 7 | 8 | test "output of the pmf function" do 9 | assert Poisson.pmf(1).(1) == 0.36787944117144233 10 | assert Poisson.pmf(10).(10) == 0.1251100357211333 11 | assert Poisson.pmf(10).(4) == 0.018916637401035358 12 | end 13 | 14 | test "return a cdf " do 15 | assert Poisson.cdf(1).(1) == 0.73575888234288467 16 | assert Poisson.cdf(10).(5) == 0.06708596287903179 17 | end 18 | 19 | test "get the percentile point value" do 20 | assert Poisson.ppf(1).(0.95) == 3.0 21 | assert Poisson.ppf(10).(0.05) == 5.0 22 | assert Poisson.ppf(32).(0.75) == 36.0 23 | assert Poisson.ppf(62).(0.05) == 49.0 24 | end 25 | 26 | test "random number generation" do 27 | r = Poisson.rand(10) 28 | assert Math.floor(r) == r 29 | end 30 | end 31 | -------------------------------------------------------------------------------- /test/t_distribution_test.exs: -------------------------------------------------------------------------------- 1 | defmodule TDistributionTest do 2 | use ExUnit.Case, async: true 3 | doctest Statistics.Distributions.T, except: [rand: 0, rand: 3] 4 | 5 | alias Statistics.Distributions.T 6 | 7 | test "output of the pdf function" do 8 | assert T.pdf(3).(0) == 0.3675525969478612 9 | assert T.pdf(7).(0.1) == 0.38279933426055135 10 | assert T.pdf(77).(0.1) == 0.39564030492250557 11 | end 12 | 13 | test "return a cdf " do 14 | assert T.cdf(3).(2) == 0.9115868129912105 15 | # assert T.cdf(0, 1) == 0.48740751602180743 # ~ 0.5 16 | end 17 | 18 | test "return a random number from the distribution" do 19 | assert is_float(T.rand(2)) 20 | # rands = for _ <- 0..10000, do: T.rand(3) 21 | # assert T.rand(77) == 0.5 22 | end 23 | 24 | @tag timeout: 120_000 25 | test "get the percentile point value" do 26 | assert T.ppf(1).(0.1) == -3.0799999999999996 27 | end 28 | end 29 | -------------------------------------------------------------------------------- /test/t_test_test.exs: -------------------------------------------------------------------------------- 1 | defmodule TTestTest do 2 | use ExUnit.Case, async: true 3 | doctest Statistics.Tests.TTest 4 | 5 | alias Statistics.Tests.TTest 6 | 7 | test "one sample t-test" do 8 | assert TTest.one_sample([1, 2, 1, 2, 1, 2], 1.5) == %{p: 1.0, t: 0.0} 9 | 10 | assert TTest.one_sample([4, 1, 2, 3, 1, 2, 3, 4], 2) == %{ 11 | p: 0.23032680249555892, 12 | t: 1.2649110640673518 13 | } 14 | end 15 | 16 | test "independent samples t-test" do 17 | assert TTest.ind_samples([1, 2, 1, 2, 1, 2], [2, 1, 2, 1, 2, 1]) == %{t: 0.0, p: 1.0} 18 | 19 | assert TTest.ind_samples([1, 2, 1, 2, 1, 2], [5, 6, 7, 6, 7, 5]) == %{ 20 | p: 4.3244470028110745e-7, 21 | t: -11.512838683202965 22 | } 23 | end 24 | end 25 | -------------------------------------------------------------------------------- /test/test_helper.exs: -------------------------------------------------------------------------------- 1 | ExUnit.start() 2 | --------------------------------------------------------------------------------