├── .github └── workflows │ └── rust.yml ├── .gitignore ├── Cargo.toml ├── FEATURES.md ├── LICENSE-APACHE ├── LICENSE-MIT ├── README.md ├── benches ├── benchmark_distributions.rs ├── benchmark_functions.rs ├── benchmark_linalg.rs ├── benchmark_statistics.rs └── benchmark_timeseries.rs ├── examples └── gp_regression │ ├── Cargo.toml │ ├── gp.png │ └── src │ └── main.rs └── src ├── distributions ├── bernoulli.rs ├── beta.rs ├── binomial.rs ├── chi_squared.rs ├── discreteuniform.rs ├── exponential.rs ├── gamma.rs ├── gumbel.rs ├── mod.rs ├── multivariatenormal.rs ├── normal.rs ├── pareto.rs ├── poisson.rs ├── t.rs └── uniform.rs ├── functions ├── combinatorial.rs ├── gamma.rs ├── interpolate.rs ├── mod.rs └── statistical.rs ├── integrate ├── functions.rs ├── mod.rs ├── odes.rs └── samples.rs ├── lib.rs ├── linalg ├── array │ ├── broadcast.rs │ ├── dot.rs │ ├── matrix.rs │ ├── mod.rs │ ├── vec.rs │ └── vops.rs ├── decomposition │ ├── cholesky.rs │ ├── lu.rs │ ├── mod.rs │ └── substitution.rs ├── mod.rs ├── rotations.rs └── utils.rs ├── optimize ├── adam.rs ├── lbfgs.rs ├── lm.rs ├── mod.rs ├── optimizers │ └── mod.rs └── sgd.rs ├── predict ├── glms │ ├── families.rs │ ├── formula.rs │ ├── glm.rs │ └── mod.rs ├── gps │ ├── kernels.rs │ └── mod.rs ├── mod.rs └── polynomial.rs ├── prelude.rs ├── statistics ├── covariance.rs ├── hist.rs ├── mod.rs ├── moments.rs ├── order.rs └── tests.rs ├── timeseries ├── autoregressive.rs ├── functions.rs └── mod.rs └── validation ├── mod.rs └── resample.rs /.github/workflows/rust.yml: -------------------------------------------------------------------------------- 1 | name: Rust 2 | 3 | on: 4 | push: 5 | branches: [ master ] 6 | pull_request: 7 | branches: [ master ] 8 | 9 | env: 10 | CARGO_TERM_COLOR: always 11 | 12 | jobs: 13 | build: 14 | 15 | runs-on: ubuntu-latest 16 | 17 | steps: 18 | - uses: actions/checkout@v2 19 | - name: Build 20 | run: cargo build --verbose 21 | - name: Run tests 22 | run: cargo test --verbose 23 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | target 2 | Cargo.lock 3 | src/main.rs 4 | .travis.yml 5 | -------------------------------------------------------------------------------- /Cargo.toml: -------------------------------------------------------------------------------- 1 | [package] 2 | name = "compute" 3 | version = "0.2.4" 4 | authors = ["Jeff Shen "] 5 | edition = "2018" 6 | license = "MIT OR Apache-2.0" 7 | 8 | readme = "README.md" 9 | description = "A crate for statistical computing." 10 | repository = "https://github.com/al-jshen/compute" 11 | 12 | categories = ["science"] 13 | keywords = ["statistics", "compute", "data"] 14 | 15 | [dependencies] 16 | alea = "0.2" 17 | approx_eq = "0.1" 18 | reverse = "0.2" 19 | impl_ops = "0.1" 20 | serde = { version = "1.0.130", features = ["derive"] } 21 | serde_derive = "1.0.130" 22 | rayon = "1.5.1" 23 | 24 | [features] 25 | blas = ["blas_crate", "accelerate-src", "openblas-src"] 26 | lapack = ["lapack_crate", "accelerate-src", "openblas-src"] 27 | 28 | [dependencies.blas_crate] 29 | package = "blas" 30 | version = "0.22" 31 | optional = true 32 | default-features = false 33 | 34 | [dependencies.lapack_crate] 35 | package = "lapack" 36 | version = "0.19" 37 | optional = true 38 | default-features = false 39 | 40 | [target.'cfg(all(target_os = "macos"))'.dependencies] 41 | accelerate-src = {version = "0.3", optional = true} 42 | 43 | [target.'cfg(not(target_os = "macos"))'.dependencies] 44 | openblas-src = {version = "0.10", optional = true} 45 | 46 | [dev-dependencies] 47 | criterion = "0.3" 48 | 49 | [[bench]] 50 | name = "benchmark_statistics" 51 | harness = false 52 | 53 | [[bench]] 54 | name = "benchmark_linalg" 55 | harness = false 56 | 57 | [[bench]] 58 | name = "benchmark_distributions" 59 | harness = false 60 | 61 | [[bench]] 62 | name = "benchmark_functions" 63 | harness = false 64 | 65 | [[bench]] 66 | name = "benchmark_timeseries" 67 | harness = false 68 | -------------------------------------------------------------------------------- /FEATURES.md: -------------------------------------------------------------------------------- 1 | ## Existing features 2 | 3 | - regression methods 4 | - polynomial 5 | - GLMs: logistic, (quasi-)Poisson, Gamma, exponential 6 | - optimization methods 7 | - numerical differentiation, partial derivatives, automatic differentiation (currently with autodiff crate) 8 | - optimizers 9 | - Adam, Levenberg-Marquardt, SGD with (Nesterov) momentum 10 | - numerical integration of functions 11 | - trapezoid, Romberg, 5-point Gauss-Legendre quadrature 12 | - basic statistical distributions 13 | - continuous 14 | - (Multivariate) Normal, Beta, Gamma, Chi Squared, Student's T, Uniform, Exponential, Pareto, Gumbel 15 | - discrete 16 | - Bernoulli, Binomial, Poisson, Discrete Uniform 17 | - sampling, PDFs/PMFs 18 | - analytic means and variances 19 | - mathematical and statistical functions 20 | - gamma, digamma, beta 21 | - logistic, logit, (general) boxcox transform, softmax 22 | - binomial coefficients 23 | - linear interpolation (with extrapolation) 24 | - statistical methods 25 | - (sample) covariance, mean, variance, min, max 26 | - time series models 27 | - autoregressive models 28 | - related functions 29 | - autocorrelation, autocovariance, differencing 30 | - validation methods 31 | - resampling 32 | - bootstrap, jackknife 33 | - linear algebra: both BLAS/LAPACK and Rust implementations 34 | - vector and matrix structs 35 | - overloaded arithmetic operations for combinations of {matrix, vector, scalar} with automatic broadcasting a la numpy 36 | - general utilities 37 | - dot product, (blocked) matrix multiplication, matrix inversion, Toeplitz matrix, Vandermonde matrix, (infinity) norm, linear solve, transpose, design matrix 38 | - vector-vector, scalar-vector, vector-scalar operations with loop unrolling 39 | - decompositions and solvers 40 | - LU, Cholesky 41 | 42 | ## Planned features 43 | 44 | - distributions: CDFs, fitting to data 45 | - more time series models (SARIMA, exponential smoothing models, trend decomposition) 46 | - non-linear optimizers (BFGS) 47 | - ODE integrators (leapfrog, RK4) 48 | - clustering algorithms (k-means/EM, DBSCAN) 49 | - more regression models (mixed models, GP, penalized models, splines) 50 | - prediction trees (CART, random forests, gradient boosted trees) 51 | - order statistics (quantiles) 52 | - statistical tests (t-test, ANOVA, Kolmogorov-Smirnov, Anderson-Darling) 53 | - data preprocessing (outlier detection, standardization, dimensionality reduction (PCA)) 54 | - more linear algebra decompositions (QR, SVD) 55 | - samplers? rejection, RWM, HMC, NUTS, (dynamic) nested sampling 56 | -------------------------------------------------------------------------------- /LICENSE-APACHE: -------------------------------------------------------------------------------- 1 | Apache License 2 | Version 2.0, January 2004 3 | http://www.apache.org/licenses/ 4 | 5 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 6 | 7 | 1. Definitions. 8 | 9 | "License" shall mean the terms and conditions for use, reproduction, 10 | and distribution as defined by Sections 1 through 9 of this document. 11 | 12 | "Licensor" shall mean the copyright owner or entity authorized by 13 | the copyright owner that is granting the License. 14 | 15 | "Legal Entity" shall mean the union of the acting entity and all 16 | other entities that control, are controlled by, or are under common 17 | control with that entity. For the purposes of this definition, 18 | "control" means (i) the power, direct or indirect, to cause the 19 | direction or management of such entity, whether by contract or 20 | otherwise, or (ii) ownership of fifty percent (50%) or more of the 21 | outstanding shares, or (iii) beneficial ownership of such entity. 22 | 23 | "You" (or "Your") shall mean an individual or Legal Entity 24 | exercising permissions granted by this License. 25 | 26 | "Source" form shall mean the preferred form for making modifications, 27 | including but not limited to software source code, documentation 28 | source, and configuration files. 29 | 30 | "Object" form shall mean any form resulting from mechanical 31 | transformation or translation of a Source form, including but 32 | not limited to compiled object code, generated documentation, 33 | and conversions to other media types. 34 | 35 | "Work" shall mean the work of authorship, whether in Source or 36 | Object form, made available under the License, as indicated by a 37 | copyright notice that is included in or attached to the work 38 | (an example is provided in the Appendix below). 39 | 40 | "Derivative Works" shall mean any work, whether in Source or Object 41 | form, that is based on (or derived from) the Work and for which the 42 | editorial revisions, annotations, elaborations, or other modifications 43 | represent, as a whole, an original work of authorship. For the purposes 44 | of this License, Derivative Works shall not include works that remain 45 | separable from, or merely link (or bind by name) to the interfaces of, 46 | the Work and Derivative Works thereof. 47 | 48 | "Contribution" shall mean any work of authorship, including 49 | the original version of the Work and any modifications or additions 50 | to that Work or Derivative Works thereof, that is intentionally 51 | submitted to Licensor for inclusion in the Work by the copyright owner 52 | or by an individual or Legal Entity authorized to submit on behalf of 53 | the copyright owner. For the purposes of this definition, "submitted" 54 | means any form of electronic, verbal, or written communication sent 55 | to the Licensor or its representatives, including but not limited to 56 | communication on electronic mailing lists, source code control systems, 57 | and issue tracking systems that are managed by, or on behalf of, the 58 | Licensor for the purpose of discussing and improving the Work, but 59 | excluding communication that is conspicuously marked or otherwise 60 | designated in writing by the copyright owner as "Not a Contribution." 61 | 62 | "Contributor" shall mean Licensor and any individual or Legal Entity 63 | on behalf of whom a Contribution has been received by Licensor and 64 | subsequently incorporated within the Work. 65 | 66 | 2. Grant of Copyright License. Subject to the terms and conditions of 67 | this License, each Contributor hereby grants to You a perpetual, 68 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 69 | copyright license to reproduce, prepare Derivative Works of, 70 | publicly display, publicly perform, sublicense, and distribute the 71 | Work and such Derivative Works in Source or Object form. 72 | 73 | 3. Grant of Patent License. Subject to the terms and conditions of 74 | this License, each Contributor hereby grants to You a perpetual, 75 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 76 | (except as stated in this section) patent license to make, have made, 77 | use, offer to sell, sell, import, and otherwise transfer the Work, 78 | where such license applies only to those patent claims licensable 79 | by such Contributor that are necessarily infringed by their 80 | Contribution(s) alone or by combination of their Contribution(s) 81 | with the Work to which such Contribution(s) was submitted. If You 82 | institute patent litigation against any entity (including a 83 | cross-claim or counterclaim in a lawsuit) alleging that the Work 84 | or a Contribution incorporated within the Work constitutes direct 85 | or contributory patent infringement, then any patent licenses 86 | granted to You under this License for that Work shall terminate 87 | as of the date such litigation is filed. 88 | 89 | 4. Redistribution. You may reproduce and distribute copies of the 90 | Work or Derivative Works thereof in any medium, with or without 91 | modifications, and in Source or Object form, provided that You 92 | meet the following conditions: 93 | 94 | (a) You must give any other recipients of the Work or 95 | Derivative Works a copy of this License; and 96 | 97 | (b) You must cause any modified files to carry prominent notices 98 | stating that You changed the files; and 99 | 100 | (c) You must retain, in the Source form of any Derivative Works 101 | that You distribute, all copyright, patent, trademark, and 102 | attribution notices from the Source form of the Work, 103 | excluding those notices that do not pertain to any part of 104 | the Derivative Works; and 105 | 106 | (d) If the Work includes a "NOTICE" text file as part of its 107 | distribution, then any Derivative Works that You distribute must 108 | include a readable copy of the attribution notices contained 109 | within such NOTICE file, excluding those notices that do not 110 | pertain to any part of the Derivative Works, in at least one 111 | of the following places: within a NOTICE text file distributed 112 | as part of the Derivative Works; within the Source form or 113 | documentation, if provided along with the Derivative Works; or, 114 | within a display generated by the Derivative Works, if and 115 | wherever such third-party notices normally appear. The contents 116 | of the NOTICE file are for informational purposes only and 117 | do not modify the License. You may add Your own attribution 118 | notices within Derivative Works that You distribute, alongside 119 | or as an addendum to the NOTICE text from the Work, provided 120 | that such additional attribution notices cannot be construed 121 | as modifying the License. 122 | 123 | You may add Your own copyright statement to Your modifications and 124 | may provide additional or different license terms and conditions 125 | for use, reproduction, or distribution of Your modifications, or 126 | for any such Derivative Works as a whole, provided Your use, 127 | reproduction, and distribution of the Work otherwise complies with 128 | the conditions stated in this License. 129 | 130 | 5. Submission of Contributions. Unless You explicitly state otherwise, 131 | any Contribution intentionally submitted for inclusion in the Work 132 | by You to the Licensor shall be under the terms and conditions of 133 | this License, without any additional terms or conditions. 134 | Notwithstanding the above, nothing herein shall supersede or modify 135 | the terms of any separate license agreement you may have executed 136 | with Licensor regarding such Contributions. 137 | 138 | 6. Trademarks. This License does not grant permission to use the trade 139 | names, trademarks, service marks, or product names of the Licensor, 140 | except as required for reasonable and customary use in describing the 141 | origin of the Work and reproducing the content of the NOTICE file. 142 | 143 | 7. Disclaimer of Warranty. Unless required by applicable law or 144 | agreed to in writing, Licensor provides the Work (and each 145 | Contributor provides its Contributions) on an "AS IS" BASIS, 146 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or 147 | implied, including, without limitation, any warranties or conditions 148 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A 149 | PARTICULAR PURPOSE. You are solely responsible for determining the 150 | appropriateness of using or redistributing the Work and assume any 151 | risks associated with Your exercise of permissions under this License. 152 | 153 | 8. Limitation of Liability. In no event and under no legal theory, 154 | whether in tort (including negligence), contract, or otherwise, 155 | unless required by applicable law (such as deliberate and grossly 156 | negligent acts) or agreed to in writing, shall any Contributor be 157 | liable to You for damages, including any direct, indirect, special, 158 | incidental, or consequential damages of any character arising as a 159 | result of this License or out of the use or inability to use the 160 | Work (including but not limited to damages for loss of goodwill, 161 | work stoppage, computer failure or malfunction, or any and all 162 | other commercial damages or losses), even if such Contributor 163 | has been advised of the possibility of such damages. 164 | 165 | 9. Accepting Warranty or Additional Liability. While redistributing 166 | the Work or Derivative Works thereof, You may choose to offer, 167 | and charge a fee for, acceptance of support, warranty, indemnity, 168 | or other liability obligations and/or rights consistent with this 169 | License. However, in accepting such obligations, You may act only 170 | on Your own behalf and on Your sole responsibility, not on behalf 171 | of any other Contributor, and only if You agree to indemnify, 172 | defend, and hold each Contributor harmless for any liability 173 | incurred by, or claims asserted against, such Contributor by reason 174 | of your accepting any such warranty or additional liability. 175 | 176 | END OF TERMS AND CONDITIONS 177 | 178 | APPENDIX: How to apply the Apache License to your work. 179 | 180 | To apply the Apache License to your work, attach the following 181 | boilerplate notice, with the fields enclosed by brackets "[]" 182 | replaced with your own identifying information. (Don't include 183 | the brackets!) The text should be enclosed in the appropriate 184 | comment syntax for the file format. We also recommend that a 185 | file or class name and description of purpose be included on the 186 | same "printed page" as the copyright notice for easier 187 | identification within third-party archives. 188 | 189 | Copyright [yyyy] [name of copyright owner] 190 | 191 | Licensed under the Apache License, Version 2.0 (the "License"); 192 | you may not use this file except in compliance with the License. 193 | You may obtain a copy of the License at 194 | 195 | http://www.apache.org/licenses/LICENSE-2.0 196 | 197 | Unless required by applicable law or agreed to in writing, software 198 | distributed under the License is distributed on an "AS IS" BASIS, 199 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 200 | See the License for the specific language governing permissions and 201 | limitations under the License. 202 | -------------------------------------------------------------------------------- /LICENSE-MIT: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2020 Jeff Shen 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # compute 2 | 3 | 4 | 5 | [![Crates.io](https://img.shields.io/crates/v/compute.svg?style=for-the-badge&color=fc8d62&logo=rust)](https://crates.io/crates/compute) 6 | [![Documentation](https://img.shields.io/badge/docs.rs-compute-5E81AC?style=for-the-badge&labelColor=555555&logoColor=white)](https://docs.rs/compute) 7 | ![License](https://img.shields.io/crates/l/compute?label=License&style=for-the-badge&color=62a69b) 8 | 9 | A crate for scientific and statistical computing. For a list of what this crate provides, see [`FEATURES.md`](FEATURES.md). For more detailed explanations, see the [documentation](https://docs.rs/compute). 10 | 11 | To use the latest stable version in your Rust program, add the following to your `Cargo.toml` file: 12 | 13 | ```rust 14 | // Cargo.toml 15 | [dependencies] 16 | compute = "0.2" 17 | ``` 18 | 19 | For the latest version, add the following to your `Cargo.toml` file: 20 | 21 | ```rust 22 | [dependencies] 23 | compute = { git = "https://github.com/al-jshen/compute" } 24 | ``` 25 | 26 | There are many functions which rely on linear algebra methods. You can either use the provided Rust methods (default), or use BLAS and/or LAPACK by activating the `"blas"` and/or the `"lapack"` feature flags in `Cargo.toml`: 27 | 28 | ```rust 29 | // example with BLAS only 30 | compute = {version = "0.2", features = ["blas"]} 31 | ``` 32 | 33 | ## Examples 34 | 35 | ### Statistical distributions 36 | 37 | ```rust 38 | use compute::distributions::*; 39 | 40 | let beta = Beta::new(2., 2.); 41 | let betadata = b.sample_n(1000); // vector of 1000 variates 42 | 43 | println!("{}", beta.mean()); // analytic mean 44 | println!("{}", beta.var()); // analytic variance 45 | println!("{}", beta.pdf(0.5)); // probability distribution function 46 | 47 | let binom = Binomial::new(4, 0.5); 48 | 49 | println!("{}", p.sample()); // sample single value 50 | println!("{}", p.pmf(2)); // probability mass function 51 | ``` 52 | 53 | ### Linear algebra 54 | 55 | ```rust 56 | use compute::linalg::*; 57 | 58 | let x = arange(1., 4., 0.1).ln_1p().reshape(-1, 3); // automatic shape detection 59 | let y = Vector::from([1., 2., 3.]); // vector struct 60 | let pd = x.t().dot(x); // transpose and matrix multiply 61 | let jitter = Matrix::eye(3) * 1e-6; // elementwise operations 62 | let c = (pd + jitter).cholesky(); // matrix decompositions 63 | let s = c.solve(&y.exp()); // linear solvers 64 | println!("{}", s); 65 | ``` 66 | 67 | ### Linear models 68 | 69 | ```rust 70 | use compute::prelude::*; 71 | 72 | let x = vec![ 73 | 0.50, 0.75, 1.00, 1.25, 1.50, 1.75, 1.75, 2.00, 2.25, 2.50, 2.75, 3.00, 3.25, 3.50, 4.00, 74 | 4.25, 4.50, 4.75, 5.00, 5.50, 75 | ]; 76 | let y = vec![ 77 | 0., 0., 0., 0., 0., 0., 1., 0., 1., 0., 1., 0., 1., 0., 1., 1., 1., 1., 1., 1., 78 | ]; 79 | let n = y.len(); 80 | let xd = design(&x, n); 81 | 82 | let mut glm = GLM::new(ExponentialFamily::Bernoulli); // logistic regression 83 | glm.set_penalty(1.); // L2 penalty 84 | glm.fit(&xd, &y, 25).unwrap(); // with fit scoring algorithm (MLE) 85 | let coef = glm.coef().unwrap(); // get estimated parameters 86 | let errors = glm.coef_standard_error().unwrap(); // get errors on parameters 87 | 88 | println!("{:?}", coef); 89 | println!("{:?}", errors); 90 | 91 | ``` 92 | 93 | ### Optimization 94 | 95 | ```rust 96 | use compute::optimize::*; 97 | 98 | // define a function using a consistent optimization interface 99 | fn rosenbrock<'a>(p: &[Var<'a>], d: &[&[f64]]) -> Var<'a> { 100 | assert_eq!(p.len(), 2); 101 | assert_eq!(d.len(), 1); 102 | assert_eq!(d[0].len(), 2); 103 | 104 | let (x, y) = (p[0], p[1]); 105 | let (a, b) = (d[0][0], d[0][1]); 106 | 107 | (a - x).powi(2) + b * (y - x.powi(2)).powi(2) 108 | } 109 | 110 | // set up and run optimizer 111 | let init = [0., 0.]; 112 | let optim = Adam::with_stepsize(5e-4); 113 | let popt = optim.optimize(rosenbrock, &init, &[&[1., 100.]], 10000); 114 | 115 | println!("{:?}", popt); 116 | ``` 117 | 118 | ### Time series models 119 | 120 | ```rust 121 | use compute::timeseries::*; 122 | 123 | let x = vec![-2.584, -3.474, -1.977, -0.226, 1.166, 0.923, -1.075, 0.732, 0.959]; 124 | 125 | let mut ar = AR::new(1); // AR(1) model 126 | ar.fit(&x); // fit model with Yule-Walker equations 127 | println!("{:?}", ar.coeffs); // get model coefficients 128 | println!("{:?}", ar.predict(&x, 5)); // forecast 5 steps ahead 129 | ``` 130 | 131 | ### Numerical integration 132 | 133 | ```rust 134 | use compute::integrate::*; 135 | 136 | let f = |x: f64| x.sqrt() + x.sin() - (3. * x).cos() - x.powi(2); 137 | println!("{}", trapz(f, 0., 1., 100)); // trapezoid integration with 100 segments 138 | println!("{}", quad5(f, 0., 1.)); // gaussian quadrature integration 139 | println!("{}", romberg(f, 0., 1., 1e-8, 10)); // romberg integration with tolerance and max steps 140 | ``` 141 | 142 | ### Data summary functions 143 | 144 | ```rust 145 | use compute::statistics::*; 146 | use compute::linalg::Vector; 147 | 148 | let x = Vector::from([2.2, 3.4, 5., 10., -2.1, 0.1]); 149 | 150 | println!("{}", x.mean()); 151 | println!("{}", x.var()); 152 | println!("{}", x.max()); 153 | println!("{}", x.argmax()); 154 | ``` 155 | 156 | ### Mathematical and statistical functions 157 | 158 | ```rust 159 | use compute::functions::*; 160 | 161 | println!("{}", logistic(4.)); 162 | println!("{}", boxcox(5., 2.); // boxcox transform 163 | println!("{}", digamma(2.)); 164 | println!("{}", binom_coeff(10, 4)); // n choose k 165 | ``` 166 | -------------------------------------------------------------------------------- /benches/benchmark_distributions.rs: -------------------------------------------------------------------------------- 1 | use compute::distributions::*; 2 | use criterion::{criterion_group, criterion_main, Criterion}; 3 | 4 | pub fn generate(c: &mut Criterion) { 5 | c.bench_function("generate 1e6 standard normals", |b| { 6 | b.iter(|| Normal::new(0., 1.).sample_n(1e6 as usize)) 7 | }); 8 | c.bench_function("generate 1e6 gammas", |b| { 9 | b.iter(|| Gamma::new(2., 4.).sample_n(1e6 as usize)) 10 | }); 11 | c.bench_function("generate 1e6 exponentials", |b| { 12 | b.iter(|| Exponential::new(2.).sample_n(1e6 as usize)) 13 | }); 14 | } 15 | 16 | fn discrete_uniform(c: &mut Criterion) { 17 | c.bench_function("generate 1e6 discrete uniform values by rounding", |b| { 18 | b.iter(|| { 19 | Uniform::new(2.1, 200.1) 20 | .sample_n(1e6 as usize) 21 | .iter() 22 | .map(|x| f64::floor(*x) as i64) 23 | .collect::>() 24 | }) 25 | }); 26 | c.bench_function("generate 1e6 discrete uniform values directly", |b| { 27 | b.iter(|| DiscreteUniform::new(2, 200).sample_n(1e6 as usize)) 28 | }); 29 | } 30 | 31 | fn poisson(c: &mut Criterion) { 32 | c.bench_function("generate 1e6 poissons with multiplication method", |b| { 33 | b.iter(|| Poisson::new(8.).sample_n(1e6 as usize)) 34 | }); 35 | c.bench_function("generate 1e6 poissons with PTRS algorithm", |b| { 36 | b.iter(|| Poisson::new(18.).sample_n(1e6 as usize)) 37 | }); 38 | } 39 | 40 | fn binomial(c: &mut Criterion) { 41 | c.bench_function("generate 1e3 poissons with inversion method", |b| { 42 | b.iter(|| Binomial::new(15, 0.4).sample_n(1e3 as usize)) 43 | }); 44 | c.bench_function("generate 1e3 poissons with BTPE algorithm", |b| { 45 | b.iter(|| Binomial::new(70, 0.7).sample_n(1e3 as usize)) 46 | }); 47 | } 48 | 49 | fn t(c: &mut Criterion) { 50 | c.bench_function("generate 1e6 t distributed variates", |b| { 51 | b.iter(|| T::new(2.).sample_n(1e6 as usize)) 52 | }); 53 | } 54 | 55 | criterion_group!(benches, generate, discrete_uniform, poisson, binomial, t); 56 | criterion_main!(benches); 57 | -------------------------------------------------------------------------------- /benches/benchmark_functions.rs: -------------------------------------------------------------------------------- 1 | use compute::functions::*; 2 | use compute::{ 3 | distributions::{Distribution, Normal}, 4 | prelude::DiscreteUniform, 5 | }; 6 | use criterion::{criterion_group, criterion_main, Criterion}; 7 | 8 | pub fn criterion_mean(c: &mut Criterion) { 9 | let v = Normal::new(0., 100.).sample_n(1e6 as usize); 10 | c.bench_function("logit 1e6", |b| b.iter(|| v.iter().map(|x| logistic(*x)))); 11 | c.bench_function("logit 1e6", |b| b.iter(|| v.iter().map(|x| logit(*x)))); 12 | c.bench_function("gamma 1e6", |b| b.iter(|| v.iter().map(|x| gamma(*x)))); 13 | c.bench_function("digamma 1e6", |b| b.iter(|| v.iter().map(|x| digamma(*x)))); 14 | } 15 | 16 | pub fn criterion_binomial(c: &mut Criterion) { 17 | let n: Vec = DiscreteUniform::new(5, 100) 18 | .sample_n(1000) 19 | .iter() 20 | .map(|x| *x as u64) 21 | .collect(); 22 | let k: Vec = n.iter().map(|x| (x / 2)).collect(); 23 | c.bench_function("binomial coeffs 1e3", |b| { 24 | b.iter(|| (0..1000).into_iter().map(|i| binom_coeff(n[i], k[i]))) 25 | }); 26 | } 27 | 28 | criterion_group!(benches, criterion_binomial); 29 | criterion_main!(benches); 30 | -------------------------------------------------------------------------------- /benches/benchmark_linalg.rs: -------------------------------------------------------------------------------- 1 | use compute::distributions::{Distribution, Normal, Uniform}; 2 | use compute::linalg::lu::*; 3 | use compute::linalg::*; 4 | use compute::prelude::Distribution1D; 5 | use criterion::{criterion_group, criterion_main, Criterion}; 6 | 7 | pub fn criterion_dot(c: &mut Criterion) { 8 | let v1 = Normal::new(0., 100.).sample_n(1e6 as usize); 9 | let v2 = Normal::new(0., 100.).sample_n(1e6 as usize); 10 | let v3 = Normal::new(0., 100.).sample_n(1e4 as usize); 11 | let v4 = Normal::new(0., 100.).sample_n(1e4 as usize); 12 | c.bench_function("dot product 1e6", |b| b.iter(|| dot(&v1, &v2))); 13 | c.bench_function("dot product 1e4", |b| b.iter(|| dot(&v3, &v4))); 14 | } 15 | 16 | pub fn criterion_norm(c: &mut Criterion) { 17 | let v1 = Normal::new(0., 100.).sample_n(1e3 as usize); 18 | c.bench_function("norm 1e3", |b| b.iter(|| norm(&v1))); 19 | } 20 | 21 | pub fn criterion_logsumexp(c: &mut Criterion) { 22 | let v1 = Normal::new(0., 1.).sample_n(1e3 as usize); 23 | c.bench_function("logsumexp 1e3", |b| b.iter(|| logsumexp(&v1))); 24 | } 25 | 26 | pub fn criterion_vops_assign(c: &mut Criterion) { 27 | let v1 = Normal::default().sample_n(1000); 28 | let v2 = Normal::default().sample_n(1000); 29 | let s = Normal::default().sample(); 30 | 31 | c.bench_function("vector add with assignment", |b| { 32 | b.iter(|| { 33 | let v = &v1 + &v2; 34 | }) 35 | }); 36 | c.bench_function("vector clone and mutating add", |b| { 37 | b.iter(|| { 38 | let mut v = v1.clone(); 39 | v += &v2; 40 | }) 41 | }); 42 | 43 | c.bench_function("vector subtract with assignment", |b| { 44 | b.iter(|| { 45 | let v = &v1 - &v2; 46 | }) 47 | }); 48 | c.bench_function("vector clone and mutating subtract", |b| { 49 | b.iter(|| { 50 | let mut v = v1.clone(); 51 | v -= &v2; 52 | }) 53 | }); 54 | 55 | c.bench_function("vector multiply with assignment", |b| { 56 | b.iter(|| { 57 | let v = &v1 * &v2; 58 | }) 59 | }); 60 | 61 | c.bench_function("vector clone and mutating multiply", |b| { 62 | b.iter(|| { 63 | let mut v = v1.clone(); 64 | v *= &v2; 65 | }) 66 | }); 67 | 68 | c.bench_function("vector divide with assignment", |b| { 69 | b.iter(|| { 70 | let v = &v1 / &v2; 71 | }) 72 | }); 73 | c.bench_function("vector clone and mutating divide", |b| { 74 | b.iter(|| { 75 | let mut v = v1.clone(); 76 | v /= &v2; 77 | }) 78 | }); 79 | } 80 | 81 | pub fn criterion_vops(c: &mut Criterion) { 82 | let mut v1 = Normal::default().sample_n(1000); 83 | let mut v2 = Normal::default().sample_n(1000); 84 | let s = Normal::default().sample(); 85 | // c.bench_function("vector add", |b| b.iter(|| &v1 + &v2)); 86 | // c.bench_function("normal add", |b| { 87 | // b.iter(|| v1.iter().zip(&v2).map(|(i, j)| i + j).collect::()) 88 | // }); 89 | 90 | c.bench_function("vector multiply", |b| b.iter(|| &v1 * &v2)); 91 | c.bench_function("normal multiply", |b| { 92 | b.iter(|| v1.iter().zip(&v2).map(|(i, j)| i * j).collect::()) 93 | }); 94 | 95 | c.bench_function("vector divide", |b| b.iter(|| &v1 / &v2)); 96 | c.bench_function("normal divide", |b| { 97 | b.iter(|| v1.iter().zip(&v2).map(|(i, j)| i * j).collect::()) 98 | }); 99 | 100 | // c.bench_function("vector ln", |b| b.iter(|| v1.ln())); 101 | // c.bench_function("normal ln", |b| { 102 | // b.iter(|| v1.iter().map(|i| i.ln()).collect::()) 103 | // }); 104 | 105 | // c.bench_function("vector sqrt", |b| b.iter(|| v1.sqrt())); 106 | // c.bench_function("normal sqrt", |b| { 107 | // b.iter(|| v1.iter().map(|i| i.sqrt()).collect::()) 108 | // }); 109 | 110 | // c.bench_function("vector exp", |b| b.iter(|| v1.exp())); 111 | // c.bench_function("normal exp", |b| { 112 | // b.iter(|| v1.iter().map(|i| i.exp()).collect::()) 113 | // }); 114 | 115 | c.bench_function("vector square", |b| b.iter(|| v1.powi(2))); 116 | c.bench_function("normal square", |b| { 117 | b.iter(|| Vector::from(v1.iter().map(|i| i.powi(2)).collect::>())) 118 | }); 119 | 120 | c.bench_function("vector cube", |b| b.iter(|| v1.powi(3))); 121 | c.bench_function("normal cube", |b| { 122 | b.iter(|| v1.iter().map(|i| i.powi(3)).collect::()) 123 | }); 124 | 125 | c.bench_function("vector float power", |b| b.iter(|| v1.powf(1.25))); 126 | c.bench_function("normal float power", |b| { 127 | b.iter(|| v1.iter().map(|i| i.powf(1.25)).collect::()) 128 | }); 129 | 130 | // c.bench_function("unrolled vector-scalar addition", |b| b.iter(|| &v1 + s)); 131 | // c.bench_function("normal vector-scalar addition", |b| { 132 | // b.iter(|| v1.iter().map(|i| i + s).collect::()) 133 | // }); 134 | 135 | // c.bench_function("unrolled vector-scalar division", |b| b.iter(|| &v1 / s)); 136 | // c.bench_function("normal vector-scalar division", |b| { 137 | // b.iter(|| v1.iter().map(|i| i / s).collect::()) 138 | // }); 139 | 140 | // c.bench_function("unrolled scalar-vector subtraction", |b| b.iter(|| s - &v1)); 141 | // c.bench_function("normal scalar-vector subtraction", |b| { 142 | // b.iter(|| v1.iter().map(|i| s - i).collect::()) 143 | // }); 144 | } 145 | 146 | pub fn criterion_matrix_sum(c: &mut Criterion) { 147 | let v = Normal::default().sample_matrix(100, 100); 148 | c.bench_function("sum rows 100x100", |b| b.iter(|| v.sum_rows())); 149 | c.bench_function("sum cols 100x100", |b| b.iter(|| v.sum_cols())); 150 | } 151 | 152 | pub fn criterion_ludecomp(c: &mut Criterion) { 153 | let v5 = Uniform::new(2., 50.).sample_n(5 * 5); 154 | c.bench_function("5x5 lu factorization", |b| b.iter(|| lu(&v5))); 155 | let v25 = Uniform::new(2., 50.).sample_n(25 * 25); 156 | c.bench_function("25x25 lu factorization", |b| b.iter(|| lu(&v25))); 157 | } 158 | 159 | pub fn criterion_solve(c: &mut Criterion) { 160 | let a5 = Uniform::new(2., 50.).sample_n(5 * 5); 161 | let b5 = Uniform::new(6., 30.).sample_n(5); 162 | c.bench_function("5 variable linear solve", |b| b.iter(|| solve(&a5, &b5))); 163 | let a10 = Uniform::new(2., 50.).sample_n(10 * 10); 164 | let b10 = Uniform::new(6., 30.).sample_n(10); 165 | c.bench_function("10 variable linear solve", |b| b.iter(|| solve(&a10, &b10))); 166 | let a30 = Uniform::new(2., 50.).sample_n(30 * 30); 167 | let b30 = Uniform::new(6., 30.).sample_n(30); 168 | c.bench_function("30 variable linear solve", |b| b.iter(|| solve(&a30, &b30))); 169 | } 170 | 171 | pub fn criterion_invert(c: &mut Criterion) { 172 | let a5 = Uniform::new(2., 50.).sample_n(5 * 5); 173 | let a10 = Uniform::new(2., 50.).sample_n(10 * 10); 174 | let a30 = Uniform::new(2., 50.).sample_n(30 * 30); 175 | c.bench_function("5x5 inversion", |b| b.iter(|| invert_matrix(&a5))); 176 | c.bench_function("10x10 inversion", |b| b.iter(|| invert_matrix(&a10))); 177 | c.bench_function("30x30 inversion", |b| b.iter(|| invert_matrix(&a30))); 178 | } 179 | 180 | pub fn criterion_matmul(c: &mut Criterion) { 181 | let normgen = Normal::new(2., 50.); 182 | let a1 = normgen.sample_n(5 * 15); 183 | let a2 = normgen.sample_n(15 * 10); 184 | let a3 = normgen.sample_n(512 * 512); 185 | 186 | c.bench_function("5x15x10 matmul", |b| { 187 | b.iter(|| matmul(&a1, &a2, 5, 15, false, false)) 188 | }); 189 | c.bench_function("5x15x10 matmul with transpose", |b| { 190 | b.iter(|| matmul(&a2, &a1, 15, 5, true, true)) 191 | }); 192 | c.bench_function("512x512 matmul", |b| { 193 | b.iter(|| matmul(&a3, &a3, 512, 512, false, false)) 194 | }); 195 | c.bench_function("512x512 matmul with transpose", |b| { 196 | b.iter(|| matmul(&a3, &a3, 512, 512, true, true)) 197 | }); 198 | } 199 | 200 | pub fn criterion_matmul_blocked(c: &mut Criterion) { 201 | let normgen = Normal::new(2., 50.); 202 | let a3 = normgen.sample_n(512 * 512); 203 | 204 | c.bench_function("512x512 blocked matmul, blocksize 25", |b| { 205 | b.iter(|| matmul_blocked(&a3, &a3, 512, 512, false, false, 25)) 206 | }); 207 | c.bench_function("512x512 blocked matmul, blocksize 100", |b| { 208 | b.iter(|| matmul_blocked(&a3, &a3, 512, 512, false, false, 100)) 209 | }); 210 | c.bench_function("512x512 blocked matmul, blocksize 250", |b| { 211 | b.iter(|| matmul_blocked(&a3, &a3, 512, 512, false, false, 250)) 212 | }); 213 | } 214 | 215 | pub fn criterion_xtx(c: &mut Criterion) { 216 | let a_20_6 = Uniform::new(2., 50.).sample_n(20 * 6); 217 | c.bench_function("20x6 xtx", |b| b.iter(|| xtx(&a_20_6, 20))); 218 | } 219 | 220 | criterion_group!(benches, criterion_vops); 221 | criterion_main!(benches); 222 | -------------------------------------------------------------------------------- /benches/benchmark_statistics.rs: -------------------------------------------------------------------------------- 1 | use compute::distributions::{Distribution1D, Normal}; 2 | use compute::linalg::*; 3 | use compute::statistics::*; 4 | use criterion::{black_box, criterion_group, criterion_main, Criterion}; 5 | 6 | pub fn criterion_mean(c: &mut Criterion) { 7 | let v = Normal::new(0., 100.).sample_n(1e6 as usize); 8 | c.bench_function("mean 1e6", |b| b.iter(|| mean(black_box(&v)))); 9 | c.bench_function("welford mean 1e6", |b| { 10 | b.iter(|| welford_mean(black_box(&v))) 11 | }); 12 | } 13 | 14 | pub fn criterion_hist_bins(c: &mut Criterion) { 15 | let bin10 = linspace(0., 10., 10); 16 | let bin100 = linspace(0., 10., 100); 17 | c.bench_function("hist bin edges 10", |b| b.iter(|| hist_bin_centers(&bin10))); 18 | c.bench_function("hist bin edges 100", |b| { 19 | b.iter(|| hist_bin_centers(&bin100)) 20 | }); 21 | } 22 | 23 | criterion_group!(benches, criterion_hist_bins); 24 | criterion_main!(benches); 25 | -------------------------------------------------------------------------------- /benches/benchmark_timeseries.rs: -------------------------------------------------------------------------------- 1 | use compute::timeseries::{acf, acovf}; 2 | use criterion::{black_box, criterion_group, criterion_main, Criterion}; 3 | 4 | pub fn criterion_tsfuncs(c: &mut Criterion) { 5 | let data: Vec = vec![ 6 | -213.0, -564.0, -35.0, -15.0, 141.0, 115.0, -420.0, -360.0, 203.0, -338.0, -431.0, 194.0, 7 | -220.0, -513.0, 154.0, -125.0, -559.0, 92.0, -21.0, -579.0, -52.0, 99.0, -543.0, -175.0, 8 | 162.0, -457.0, -346.0, 204.0, -300.0, -474.0, 164.0, -107.0, -572.0, -8.0, 83.0, -541.0, 9 | -224.0, 180.0, -420.0, -374.0, 201.0, -236.0, -531.0, 83.0, 27.0, -564.0, -112.0, 131.0, 10 | -507.0, -254.0, 199.0, -311.0, -495.0, 143.0, -46.0, -579.0, -90.0, 136.0, -472.0, -338.0, 11 | 202.0, -287.0, -477.0, 169.0, -124.0, -568.0, 17.0, 48.0, -568.0, -135.0, 162.0, -430.0, 12 | -422.0, 172.0, -74.0, -577.0, -13.0, 92.0, -534.0, -243.0, 194.0, -355.0, -465.0, 156.0, 13 | -81.0, -578.0, -64.0, 139.0, -449.0, -384.0, 193.0, -198.0, -538.0, 110.0, -44.0, -577.0, 14 | -6.0, 66.0, -552.0, -164.0, 161.0, -460.0, -344.0, 205.0, -281.0, -504.0, 134.0, -28.0, 15 | -576.0, -118.0, 156.0, -437.0, -381.0, 200.0, -220.0, -540.0, 83.0, 11.0, -568.0, -160.0, 16 | 172.0, -414.0, -408.0, 188.0, -125.0, -572.0, -32.0, 139.0, -492.0, -321.0, 205.0, -262.0, 17 | -504.0, 142.0, -83.0, -574.0, 0.0, 48.0, -571.0, -106.0, 137.0, -501.0, -266.0, 190.0, 18 | -391.0, -406.0, 194.0, -186.0, -553.0, 83.0, -13.0, -577.0, -49.0, 103.0, -515.0, -280.0, 19 | 201.0, 300.0, -506.0, 131.0, -45.0, -578.0, -80.0, 138.0, -462.0, -361.0, 201.0, -211.0, 20 | -554.0, 32.0, 74.0, -533.0, -235.0, 187.0, -372.0, -442.0, 182.0, -147.0, -566.0, 25.0, 21 | 68.0, -535.0, -244.0, 194.0, -351.0, -463.0, 174.0, -125.0, -570.0, 15.0, 72.0, -550.0, 22 | -190.0, 172.0, -424.0, -385.0, 198.0, -218.0, -536.0, 96.0, 23 | ]; 24 | c.bench_function("autocorrelation function up to lag 50", |b| { 25 | b.iter(|| { 26 | (0..50) 27 | .into_iter() 28 | .map(|i| acf(black_box(&data), i)) 29 | .collect::>() 30 | }); 31 | }); 32 | c.bench_function("autocovariance function up to lag 50", |b| { 33 | b.iter(|| { 34 | (0..50) 35 | .into_iter() 36 | .map(|i| acovf(black_box(&data), i)) 37 | .collect::>() 38 | }); 39 | }); 40 | } 41 | 42 | criterion_group!(benches, criterion_tsfuncs); 43 | criterion_main!(benches); 44 | -------------------------------------------------------------------------------- /examples/gp_regression/Cargo.toml: -------------------------------------------------------------------------------- 1 | [package] 2 | name = "gp_regression" 3 | version = "0.1.0" 4 | edition = "2018" 5 | 6 | # See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html 7 | 8 | [dependencies] 9 | compute = "0.1.31" 10 | plotly = "0.6" 11 | -------------------------------------------------------------------------------- /examples/gp_regression/gp.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/al-jshen/compute/73ead41fddbe25f70845baa77ece338976b87c21/examples/gp_regression/gp.png -------------------------------------------------------------------------------- /examples/gp_regression/src/main.rs: -------------------------------------------------------------------------------- 1 | use compute::prelude::*; 2 | use plotly::{ 3 | common::{Font, Line, Marker, Mode, Title}, 4 | layout::{Axis, Legend}, 5 | Layout, Plot, Rgb, Rgba, Scatter, 6 | }; 7 | 8 | fn main() { 9 | // variance parameter 10 | let var = 0.05; 11 | // length-scale squared parameter 12 | let lsq = 1.2; 13 | // generate test points 14 | let n = 250; 15 | let xtest = linspace(0., 10., n).to_matrix().reshape(-1, 1); 16 | // similarity of test values 17 | let k_ss = rbfkernel(&xtest, &xtest, var, lsq); 18 | 19 | // 10 randomly sampled noiseless training points 20 | let xtrain = Uniform::new(0., 10.).sample_matrix(10, 1); 21 | let ytrain = xtrain.sin(); 22 | 23 | // apply kernel to training points 24 | let kern = rbfkernel(&xtrain, &xtrain, var, lsq); 25 | let l = (&kern + Matrix::eye(xtrain.nrows) * 0.00005).cholesky(); 26 | let (lu, piv) = l.lu(); 27 | 28 | // get mean at test points 29 | let k_s = rbfkernel(&xtrain, &xtest, var, lsq); 30 | let lk = lu.lu_solve(&piv, &k_s); 31 | let mu = lk.t().dot(lu.lu_solve(&piv, &ytrain)).to_vec(); 32 | 33 | // get uncertainty on prediction 34 | let std = (k_ss.diag() - lk.powi(2).sum_cols()).sqrt(); 35 | 36 | // plot 37 | let layout = Layout::new() 38 | .title( 39 | Title::new(r"$\text{Gaussian Process Regression on }f(x) = \sin(x)$".into()) 40 | .font(Font::new().size(30)), 41 | ) 42 | .y_axis(Axis::new().title(Title::new("y"))) 43 | .x_axis(Axis::new().title(Title::new("x"))) 44 | .font(Font::new().size(20)) 45 | .legend(Legend::new().font(Font::new().size(20))); 46 | 47 | let trace1 = Scatter::new(xtrain.to_vec(), ytrain.to_vec()) 48 | .name("Training data") 49 | .mode(Mode::Markers) 50 | .marker(Marker::new().size(15)); 51 | let trace2 = Scatter::new(xtest.to_vec(), xtest.sin().to_vec()) 52 | .name("True function") 53 | .mode(Mode::Lines) 54 | .line(Line::new().dash(plotly::common::DashType::Dot)); 55 | let trace3 = Scatter::new(xtest.to_vec(), mu.clone()) 56 | .name("Mean prediction") 57 | .mode(Mode::Lines) 58 | .line(Line::new().color(Rgb::new(50, 120, 200)).width(3.)); 59 | let trace4 = Scatter::new(xtest.to_vec(), mu.clone() - 2. * std.clone()) 60 | .mode(Mode::Lines) 61 | .show_legend(false) 62 | .line(Line::new().color(Rgb::new(20, 20, 20))); 63 | let trace5 = Scatter::new(xtest.to_vec(), mu.clone() + 2. * std.clone()) 64 | .mode(Mode::Lines) 65 | .line(Line::new().color(Rgb::new(20, 20, 20))) 66 | .name("95% confidence interval") 67 | .fill(plotly::common::Fill::ToNextY) 68 | .fill_color(Rgba::new(30, 30, 30, 0.2)); 69 | 70 | let mut plot = Plot::new(); 71 | plot.set_layout(layout); 72 | plot.add_trace(trace4); 73 | plot.add_trace(trace5); 74 | plot.add_trace(trace2); 75 | plot.add_trace(trace1); 76 | plot.add_trace(trace3); 77 | // plot.show_png(1920, 1080); 78 | plot.show(); 79 | } 80 | 81 | fn rbfkernel(a: &Matrix, b: &Matrix, var: f64, lengthsq: f64) -> Matrix { 82 | let sq_dist = a.powi(2).reshape(-1, 1) + b.powi(2).reshape(1, -1) - 2. * a.dot_t(b); 83 | var * (-0.5 * sq_dist / lengthsq).exp() 84 | } 85 | -------------------------------------------------------------------------------- /src/distributions/bernoulli.rs: -------------------------------------------------------------------------------- 1 | #![allow(clippy::float_cmp)] 2 | use crate::distributions::*; 3 | 4 | /// Implements the [Bernoulli distribution](https://en.wikipedia.org/wiki/Bernoulli_distribution). 5 | #[derive(Debug, Clone, Copy)] 6 | pub struct Bernoulli { 7 | /// Probability `p` of the Bernoulli distribution 8 | p: f64, 9 | } 10 | 11 | impl Bernoulli { 12 | /// Create a new Bernoulli distribution with probability `p`. 13 | /// 14 | /// # Errors 15 | /// Panics if p is not in [0, 1]. 16 | pub fn new(p: f64) -> Self { 17 | if !(0. ..=1.).contains(&p) { 18 | panic!("`p` must be in [0, 1]."); 19 | } 20 | Bernoulli { p } 21 | } 22 | pub fn set_p(&mut self, p: f64) -> &mut Self { 23 | if !(0. ..=1.).contains(&p) { 24 | panic!("`p` must be in [0, 1]."); 25 | } 26 | self.p = p; 27 | self 28 | } 29 | } 30 | 31 | impl Default for Bernoulli { 32 | fn default() -> Self { 33 | Self::new(0.5) 34 | } 35 | } 36 | 37 | impl Distribution for Bernoulli { 38 | type Output = f64; 39 | /// Samples from the given Bernoulli distribution. 40 | fn sample(&self) -> f64 { 41 | if self.p == 1. { 42 | return 1.; 43 | } else if self.p == 0. { 44 | return 0.; 45 | } 46 | 47 | if self.p > alea::f64() { 48 | 1. 49 | } else { 50 | 0. 51 | } 52 | } 53 | } 54 | 55 | impl Distribution1D for Bernoulli { 56 | fn update(&mut self, params: &[f64]) { 57 | self.set_p(params[0]); 58 | } 59 | } 60 | 61 | impl Discrete for Bernoulli { 62 | /// Calculates the [probability mass 63 | /// function](https://en.wikipedia.org/wiki/Probability_mass_function) for the given Bernoulli 64 | /// distribution at `x`. 65 | /// 66 | fn pmf(&self, k: i64) -> f64 { 67 | if k == 0 { 68 | 1. - self.p 69 | } else if k == 1 { 70 | self.p 71 | } else { 72 | 0. 73 | } 74 | } 75 | } 76 | 77 | impl Mean for Bernoulli { 78 | type MeanType = f64; 79 | /// Calculates the mean of the Bernoulli distribution, which is `p`. 80 | fn mean(&self) -> f64 { 81 | self.p 82 | } 83 | } 84 | 85 | impl Variance for Bernoulli { 86 | type VarianceType = f64; 87 | /// Calculates the variance, given by `p*q = p(1-p)`. 88 | fn var(&self) -> f64 { 89 | self.p * (1. - self.p) 90 | } 91 | } 92 | 93 | #[cfg(test)] 94 | mod tests { 95 | use super::*; 96 | use crate::statistics::{mean, var}; 97 | use approx_eq::assert_approx_eq; 98 | 99 | #[test] 100 | fn test_bernoulli() { 101 | let data = Bernoulli::new(0.75).sample_n(1e6 as usize); 102 | for i in &data { 103 | assert!(*i == 0. || *i == 1.); 104 | } 105 | assert_approx_eq!(0.75, mean(&data), 1e-2); 106 | assert_approx_eq!(0.75 * 0.25, var(&data), 1e-2); 107 | assert!(Bernoulli::default().pmf(2) == 0.); 108 | assert!(Bernoulli::default().pmf(0) == 0.5); 109 | } 110 | } 111 | -------------------------------------------------------------------------------- /src/distributions/beta.rs: -------------------------------------------------------------------------------- 1 | use crate::distributions::*; 2 | use crate::functions::beta; 3 | 4 | /// Implements the [Beta](https://en.wikipedia.org/wiki/Beta_distribution) distribution. 5 | #[derive(Debug, Clone, Copy)] 6 | pub struct Beta { 7 | /// Shape parameter α. 8 | alpha: f64, 9 | /// Shape parameter β. 10 | beta: f64, 11 | /// Gamma(alpha, 1) distribution used to sample gamma variables for the creation of beta 12 | /// variables. 13 | alpha_gen: Gamma, 14 | /// Gamma(beta, 1) distribution used to sample gamma variables for the creation of beta 15 | /// variables. 16 | beta_gen: Gamma, 17 | } 18 | 19 | impl Beta { 20 | /// Create a new Beta distribution with parameters `alpha` and `beta`. 21 | /// 22 | /// # Errors 23 | /// Panics if `alpha <= 0` or `beta <= 0`. 24 | pub fn new(alpha: f64, beta: f64) -> Self { 25 | if alpha <= 0. || beta <= 0. { 26 | panic!("Both alpha and beta must be positive."); 27 | } 28 | Beta { 29 | alpha, 30 | beta, 31 | alpha_gen: Gamma::new(alpha, 1.), 32 | beta_gen: Gamma::new(beta, 1.), 33 | } 34 | } 35 | pub fn set_alpha(&mut self, alpha: f64) -> &mut Self { 36 | if alpha <= 0. { 37 | panic!("Alpha must be positive."); 38 | } 39 | self.alpha = alpha; 40 | self.alpha_gen = Gamma::new(alpha, 1.); 41 | self 42 | } 43 | pub fn set_beta(&mut self, beta: f64) -> &mut Self { 44 | if beta <= 0. { 45 | panic!("Beta must be positive."); 46 | } 47 | self.beta = beta; 48 | self.beta_gen = Gamma::new(beta, 1.); 49 | self 50 | } 51 | } 52 | 53 | impl Default for Beta { 54 | fn default() -> Self { 55 | Self::new(1., 1.) 56 | } 57 | } 58 | 59 | impl Distribution for Beta { 60 | type Output = f64; 61 | /// Samples from the given Beta distribution using the Gamma distribution. 62 | fn sample(&self) -> f64 { 63 | let x = self.alpha_gen.sample(); 64 | x / (x + self.beta_gen.sample()) 65 | } 66 | } 67 | 68 | impl Distribution1D for Beta { 69 | fn update(&mut self, params: &[f64]) { 70 | self.set_alpha(params[0]).set_beta(params[1]); 71 | } 72 | } 73 | 74 | impl Continuous for Beta { 75 | type PDFType = f64; 76 | /// Calculates the probability density function for the given Beta function at `x`. 77 | /// 78 | /// # Remarks 79 | /// Returns 0. if x is not in `[0, 1]` 80 | fn pdf(&self, x: f64) -> f64 { 81 | if !(0. ..=1.).contains(&x) { 82 | return 0.; 83 | } 84 | x.powf(self.alpha - 1.) * (1. - x).powf(self.beta - 1.) / beta(self.alpha, self.beta) 85 | } 86 | } 87 | 88 | impl Mean for Beta { 89 | type MeanType = f64; 90 | /// Returns the mean of the beta distribution, which for a B(a, b) 91 | /// distribution is given by `a / (a + b)`. 92 | fn mean(&self) -> f64 { 93 | self.alpha / (self.alpha + self.beta) 94 | } 95 | } 96 | 97 | impl Variance for Beta { 98 | type VarianceType = f64; 99 | /// Returns the variance of the beta distribution. 100 | fn var(&self) -> f64 { 101 | (self.alpha * self.beta) 102 | / ((self.alpha + self.beta).powi(2) * (self.alpha + self.beta + 1.)) 103 | } 104 | } 105 | 106 | #[cfg(test)] 107 | mod tests { 108 | use super::*; 109 | use crate::statistics::{mean, var}; 110 | use approx_eq::assert_approx_eq; 111 | 112 | #[test] 113 | fn test_moments() { 114 | let dist = Beta::new(2., 4.); 115 | let data = dist.sample_n(1e6 as usize); 116 | assert_approx_eq!(dist.mean(), mean(&data), 1e-2); 117 | assert_approx_eq!(dist.var(), var(&data), 1e-2); 118 | } 119 | } 120 | -------------------------------------------------------------------------------- /src/distributions/binomial.rs: -------------------------------------------------------------------------------- 1 | use crate::distributions::*; 2 | use crate::functions::binom_coeff; 3 | 4 | /// Implements the [Binomial](https://en.wikipedia.org/wiki/https://en.wikipedia.org/wiki/Binomial_distribution) 5 | /// distribution with trials `n` and probability of success `p`. 6 | #[derive(Debug, Clone, Copy)] 7 | pub struct Binomial { 8 | n: u64, 9 | p: f64, 10 | } 11 | 12 | impl Binomial { 13 | /// Create a new Binomial distribution with parameters `n` and `p`. 14 | /// 15 | /// # Remarks 16 | /// `n` must be a non-negative integer, and `p` must be in [0, 1]. 17 | pub fn new(n: u64, p: f64) -> Self { 18 | if !(0. ..=1.).contains(&p) { 19 | panic!("`p` must be in [0, 1]"); 20 | } 21 | Binomial { n, p } 22 | } 23 | pub fn set_n(&mut self, n: u64) -> &mut Self { 24 | self.n = n; 25 | self 26 | } 27 | pub fn set_p(&mut self, p: f64) -> &mut Self { 28 | if !(0. ..=1.).contains(&p) { 29 | panic!("`p` must be in [0, 1]"); 30 | } 31 | self.p = p; 32 | self 33 | } 34 | } 35 | 36 | impl Default for Binomial { 37 | fn default() -> Self { 38 | Self::new(1, 0.5) 39 | } 40 | } 41 | 42 | impl Distribution for Binomial { 43 | type Output = f64; 44 | /// Samples from the given Binomial distribution. For `np <= 30`, this is done with an inversion algorithm. 45 | /// Otherwise, this is done with the BTPE algorithm from Kachitvichyanukul and Schmeiser 1988. 46 | fn sample(&self) -> f64 { 47 | if self.n == 0 || self.p == 0. { 48 | return 0.; 49 | } else if (self.p - 1.).abs() <= f64::EPSILON { 50 | return self.n as f64; 51 | } 52 | 53 | let switch = self.p > 0.5; 54 | let p = if switch { 1. - self.p } else { self.p }; 55 | 56 | let res = if p * self.n as f64 <= 30. { 57 | binomial_inversion(self.n, p) 58 | } else { 59 | binomial_btpe(self.n, p) 60 | }; 61 | 62 | if switch { 63 | (self.n - res) as f64 64 | } else { 65 | res as f64 66 | } 67 | } 68 | } 69 | 70 | impl Distribution1D for Binomial { 71 | fn update(&mut self, params: &[f64]) { 72 | self.set_n(params[0] as u64); 73 | self.set_p(params[1]); 74 | } 75 | } 76 | 77 | pub fn binomial_inversion(n: u64, p: f64) -> u64 { 78 | let s = p / (1. - p); 79 | let a = ((n + 1) as f64) * s; 80 | let mut r = (1. - p).powi(n as i32); 81 | let mut u = alea::f64(); 82 | let mut x: u64 = 0; 83 | while u > r as f64 { 84 | u -= r; 85 | x += 1; 86 | r *= a / (x as f64) - s; 87 | } 88 | x 89 | } 90 | 91 | pub fn binomial_btpe(n: u64, p: f64) -> u64 { 92 | // step 0 93 | let nf = n as f64; 94 | let r = if p <= 0.5 { p } else { 1. - p }; 95 | let q = 1. - r; 96 | let nrq = nf * r * q; 97 | let fm = nf * r + r; 98 | let m = fm.floor(); 99 | let p1 = (2.195 * nrq.sqrt() - 4.6 * q).floor() + 0.5; 100 | let xm = m + 0.5; 101 | let xl = xm - p1; 102 | let xr = xm + p1; 103 | let lambda = |x: f64| x * (1. + x / 2.); 104 | let c = 0.134 + 20.5 / (15.3 + m); 105 | let ll = lambda((fm - xl) / (fm - xl * r)); 106 | let lr = lambda((xr - fm) / (xr * q)); 107 | let p2 = p1 * (1. + 2. * c); 108 | let p3 = p2 + c / ll; 109 | let p4 = p3 + c / lr; 110 | let mut y: f64; 111 | 112 | let ugen = Uniform::new(0., p4); 113 | let vgen = Uniform::new(0., 1.); 114 | 115 | loop { 116 | // step 1 117 | let u = ugen.sample(); 118 | let mut v = vgen.sample(); 119 | 120 | // clippy says dont do this 121 | // if !(u > p1) { 122 | 123 | // clippy suggests this, then says dont do this... 124 | // let u_p1_cmp = match u.partial_cmp(&p1) { 125 | // None | Some(std::cmp::Ordering::Equal) | Some(std::cmp::Ordering::Less) => true, 126 | // _ => false, 127 | // }; 128 | // 129 | // if u_p1_cmp { 130 | 131 | if matches!( 132 | u.partial_cmp(&p1), 133 | None | Some(std::cmp::Ordering::Equal) | Some(std::cmp::Ordering::Less) 134 | ) { 135 | y = (xm - p1 * v + u).floor(); 136 | // go to step 6 137 | break; 138 | } 139 | 140 | if matches!( 141 | u.partial_cmp(&p2), 142 | None | Some(std::cmp::Ordering::Equal) | Some(std::cmp::Ordering::Less) 143 | ) { 144 | // step 2 145 | let x = xl + (u - p1) / c; 146 | v = v * c + 1. - (m - x + 0.5).abs() / p1; 147 | if v > 1. { 148 | // go to step 1 149 | continue; 150 | } else { 151 | y = x.floor(); 152 | // go to step 5 153 | } 154 | } else if matches!( 155 | u.partial_cmp(&p3), 156 | None | Some(std::cmp::Ordering::Equal) | Some(std::cmp::Ordering::Less) 157 | ) { 158 | // step 3 159 | y = (xl + v.ln() / ll).floor(); 160 | if y < 0. { 161 | // go to step 1 162 | continue; 163 | } else { 164 | v *= (u - p2) * ll; 165 | // go to step 5 166 | } 167 | } else { 168 | // step 4 169 | y = (xr - v.ln() / lr).floor(); 170 | if y > nf { 171 | // go to step 1 172 | continue; 173 | } else { 174 | v *= (u - p3) * lr; 175 | // go to step 5 176 | } 177 | } 178 | 179 | // step 5.0 180 | let k = (y - m).abs(); 181 | if !(k > 20. && k < 0.5 * (nrq) - 1.) { 182 | // step 5.1 183 | let s = p / q; 184 | let a = s * (n as f64 + 1.); 185 | let mut f = 1.; 186 | 187 | if m < y { 188 | let mut i = m; 189 | loop { 190 | i += 1.; 191 | f *= (a / i) - s; 192 | if (i - y).abs() < f64::EPSILON { 193 | break; 194 | } 195 | } 196 | } else if m > y { 197 | let mut i = y; 198 | loop { 199 | i += 1.; 200 | f /= (a / i) - s; 201 | if (i - m).abs() < f64::EPSILON { 202 | break; 203 | } 204 | } 205 | } 206 | if v > f { 207 | // go to step 1 208 | continue; 209 | } else { 210 | // go to step 6 211 | break; 212 | } 213 | } 214 | 215 | // step 5.2 216 | let rho = (k / nrq) * ((k * (k / 3. + 0.625) + 1. / 6.) / nrq + 0.5); 217 | let t = -k * k / (2. * nrq); 218 | let biga = v.ln(); 219 | if biga < t - rho { 220 | // go to step 6 221 | break; 222 | } 223 | if biga > t + rho { 224 | // go to step 1 225 | continue; 226 | } 227 | 228 | // step 5.3 229 | let x1 = y + 1.; 230 | let f1 = m + 1.; 231 | let z = nf + 1. - m; 232 | let w = nf - y + 1.; 233 | 234 | let st = |x: f64| { 235 | (13860. - (462. - (132. - (99. - 140. / (x * x)) / (x * x)) / (x * x)) / (x * x)) 236 | / x 237 | / 166320. 238 | }; 239 | 240 | if biga 241 | > xm * (f1 / x1).ln() 242 | + (nf - m + 0.5) * (z / w).ln() 243 | + (y - m) * (w * r / (x1 * q)).ln() 244 | + st(f1) 245 | + st(z) 246 | + st(x1) 247 | + st(w) 248 | { 249 | // go to step 1 250 | continue; 251 | } 252 | // go to step 6 253 | break; 254 | } 255 | 256 | // step 6 257 | if p > 0.5 { 258 | y = nf - y; 259 | } 260 | 261 | y as u64 262 | } 263 | 264 | impl Discrete for Binomial { 265 | /// Calculates the [probability mass 266 | /// function](https://en.wikipedia.org/wiki/Probability_mass_function) for the given Binomial 267 | /// distribution at `k`. 268 | /// 269 | fn pmf(&self, k: i64) -> f64 { 270 | binom_coeff(self.n, k as u64) as f64 271 | * self.p.powi(k as i32) 272 | * (1. - self.p).powi((self.n - k as u64) as i32) 273 | } 274 | } 275 | 276 | impl Mean for Binomial { 277 | type MeanType = f64; 278 | /// Calculates the mean, which is given by `np`. 279 | fn mean(&self) -> f64 { 280 | self.n as f64 * self.p 281 | } 282 | } 283 | 284 | impl Variance for Binomial { 285 | type VarianceType = f64; 286 | /// Calculates the variance, which is given by `npq`, where `q = 1-p` 287 | fn var(&self) -> f64 { 288 | self.n as f64 * self.p * (1. - self.p) 289 | } 290 | } 291 | 292 | #[cfg(test)] 293 | mod tests { 294 | use super::*; 295 | use crate::statistics::{mean, var}; 296 | use approx_eq::assert_approx_eq; 297 | 298 | #[test] 299 | fn test_moments() { 300 | let distr1 = Binomial::new(15, 0.3); 301 | let data1 = distr1.sample_n(1e6 as usize); 302 | let mean1 = mean(&data1); 303 | let var1 = var(&data1); 304 | assert_approx_eq!(mean1, 4.5, 1e-2); 305 | assert_approx_eq!(var1, 3.15, 1e-2); 306 | 307 | let distr2 = Binomial::new(70, 0.5); 308 | let data2 = distr2.sample_n(1e6 as usize); 309 | let mean2 = mean(&data2); 310 | let var2 = var(&data2); 311 | assert_approx_eq!(mean2, 35., 1e-2); 312 | assert_approx_eq!(var2, 17.5, 1e-2); 313 | } 314 | } 315 | -------------------------------------------------------------------------------- /src/distributions/chi_squared.rs: -------------------------------------------------------------------------------- 1 | use crate::distributions::*; 2 | use crate::functions::gamma; 3 | 4 | /// Implements the [Chi square](https://en.wikipedia.org/wiki/Chi-square_distribution) distribution. 5 | #[derive(Debug, Clone, Copy)] 6 | pub struct ChiSquared { 7 | /// Degrees of freedom (k) 8 | dof: usize, 9 | sampler: Gamma, 10 | } 11 | 12 | impl ChiSquared { 13 | /// Create a new Chi square distribution with 14 | /// 15 | /// # Errors 16 | /// Panics if degrees of freedom is not positive. 17 | pub fn new(dof: usize) -> Self { 18 | assert!(dof > 0, "Degrees of freedom must be positive."); 19 | ChiSquared { 20 | dof, 21 | sampler: Gamma::new((dof as f64) / 2., 0.5), 22 | } 23 | } 24 | pub fn set_dof(&mut self, dof: usize) -> &mut Self { 25 | assert!(dof > 0, "Degrees of freedom must be positive."); 26 | self.dof = dof; 27 | self 28 | } 29 | } 30 | 31 | impl Default for ChiSquared { 32 | fn default() -> Self { 33 | Self::new(1) 34 | } 35 | } 36 | 37 | impl Distribution for ChiSquared { 38 | type Output = f64; 39 | /// Samples from the given Chi square distribution. 40 | fn sample(&self) -> f64 { 41 | self.sampler.sample() 42 | } 43 | } 44 | 45 | impl Distribution1D for ChiSquared { 46 | fn update(&mut self, params: &[f64]) { 47 | self.set_dof(params[0] as usize); 48 | } 49 | } 50 | 51 | impl Continuous for ChiSquared { 52 | type PDFType = f64; 53 | /// Calculates the probability density function for the given Chi square distribution at `x`. 54 | /// 55 | /// # Remarks 56 | /// If `dof = 1` then x should be positive. Otherwise, x should be non-negative. If these 57 | /// conditions are not met, then the probability of x is 0. 58 | fn pdf(&self, x: f64) -> f64 { 59 | if (self.dof == 1 && x <= 0.) || (x < 0.) { 60 | return 0.; 61 | } 62 | let half_k = (self.dof as f64) / 2.; 63 | 1. / (2_f64.powf(half_k) * gamma(half_k)) * x.powf(half_k - 1.) * (-x / 2.).exp() 64 | } 65 | } 66 | 67 | impl Mean for ChiSquared { 68 | type MeanType = f64; 69 | /// Calculates the mean of the Chi square distribution, which is the same as its degrees of 70 | /// freedom. 71 | fn mean(&self) -> f64 { 72 | self.dof as f64 73 | } 74 | } 75 | 76 | impl Variance for ChiSquared { 77 | type VarianceType = f64; 78 | /// Calculates the variance of the Chi square distribution. 79 | fn var(&self) -> f64 { 80 | self.mean() * 2. 81 | } 82 | } 83 | 84 | #[cfg(test)] 85 | mod tests { 86 | 87 | use super::*; 88 | use crate::statistics::{mean, var}; 89 | use approx_eq::assert_approx_eq; 90 | 91 | #[test] 92 | fn test_moments() { 93 | let data1 = ChiSquared::new(2).sample_n(1e6 as usize); 94 | assert_approx_eq!(2., mean(&data1), 1e-2); 95 | assert_approx_eq!(4., var(&data1), 1e-2); 96 | 97 | let data2 = ChiSquared::new(5).sample_n(1e6 as usize); 98 | assert_approx_eq!(5., mean(&data2), 1e-2); 99 | assert_approx_eq!(10., var(&data2), 1e-2); 100 | } 101 | } 102 | -------------------------------------------------------------------------------- /src/distributions/discreteuniform.rs: -------------------------------------------------------------------------------- 1 | use crate::distributions::*; 2 | 3 | /// Implements the [discrete uniform distribution](https://en.wikipedia.org/wiki/Discrete_uniform_distribution). 4 | #[derive(Debug, Clone, Copy)] 5 | pub struct DiscreteUniform { 6 | /// Lower bound for the discrete uniform distribution. 7 | lower: i64, 8 | /// Upper bound for the discrete uniform distribution. 9 | upper: i64, 10 | } 11 | 12 | impl DiscreteUniform { 13 | /// Create a new discrete uniform distribution with lower bound `lower` and upper bound `upper` (inclusive on both ends). 14 | /// 15 | /// # Errors 16 | /// Panics if `lower > upper`. 17 | pub fn new(lower: i64, upper: i64) -> Self { 18 | if lower > upper { 19 | panic!("`Upper` must be larger than `lower`."); 20 | } 21 | DiscreteUniform { lower, upper } 22 | } 23 | pub fn set_lower(&mut self, lower: i64) -> &mut Self { 24 | if lower > self.upper { 25 | panic!("Upper must be larger than lower.") 26 | } 27 | self.lower = lower; 28 | self 29 | } 30 | pub fn set_upper(&mut self, upper: i64) -> &mut Self { 31 | if self.lower > upper { 32 | panic!("Upper must be larger than lower.") 33 | } 34 | self.upper = upper; 35 | self 36 | } 37 | } 38 | 39 | impl Default for DiscreteUniform { 40 | fn default() -> Self { 41 | Self::new(0, 1) 42 | } 43 | } 44 | 45 | impl Distribution for DiscreteUniform { 46 | type Output = f64; 47 | /// Samples from the given discrete uniform distribution. 48 | fn sample(&self) -> f64 { 49 | alea::i64_in_range(self.lower, self.upper) as f64 50 | } 51 | } 52 | 53 | impl Distribution1D for DiscreteUniform { 54 | fn update(&mut self, params: &[f64]) { 55 | self.set_lower(params[0] as i64).set_upper(params[1] as i64); 56 | } 57 | } 58 | 59 | impl Discrete for DiscreteUniform { 60 | /// Calculates the [probability mass 61 | /// function](https://en.wikipedia.org/wiki/Probability_mass_function) for the given discrete uniform 62 | /// distribution at `x`. 63 | /// 64 | /// # Remarks 65 | /// 66 | /// Returns `0.` if `x` is not in `[lower, upper]` 67 | fn pmf(&self, x: i64) -> f64 { 68 | if x < self.lower || x > self.upper { 69 | 0. 70 | } else { 71 | 1. / (self.upper - self.lower + 1) as f64 72 | } 73 | } 74 | } 75 | 76 | impl Mean for DiscreteUniform { 77 | type MeanType = f64; 78 | /// Calculates the mean, which for a Uniform(a, b) distribution is given by `(a + b) / 2`. 79 | fn mean(&self) -> f64 { 80 | ((self.lower + self.upper) / 2) as f64 81 | } 82 | } 83 | 84 | impl Variance for DiscreteUniform { 85 | type VarianceType = f64; 86 | /// Calculates the variance of the given Uniform distribution. 87 | fn var(&self) -> f64 { 88 | (((self.upper - self.lower + 1) as f64).powi(2) - 1.) / 12. 89 | } 90 | } 91 | 92 | #[test] 93 | fn inrange() { 94 | let u = self::DiscreteUniform::new(-2, 6); 95 | let samples = u.sample_n(100); 96 | samples.into_iter().for_each(|x| { 97 | assert!(-2. <= x); 98 | assert!(x <= 6.); 99 | }) 100 | } 101 | -------------------------------------------------------------------------------- /src/distributions/exponential.rs: -------------------------------------------------------------------------------- 1 | use crate::distributions::*; 2 | 3 | /// Implements the [Exponential](https://en.wikipedia.org/wiki/Exponential_distribution) 4 | /// distribution. 5 | #[derive(Debug, Clone, Copy)] 6 | pub struct Exponential { 7 | /// Rate parameter λ 8 | lambda: f64, 9 | /// Random number generator used to sample from the distribution. Uses a Uniform distribution 10 | /// in order to perform inverse transform sampling. 11 | rng: Uniform, 12 | } 13 | 14 | impl Exponential { 15 | /// Create a new Exponential distribution with rate parameter `lambda`. 16 | /// 17 | /// # Errors 18 | /// Panics if `lambda <= 0`. 19 | pub fn new(lambda: f64) -> Self { 20 | if lambda <= 0. { 21 | panic!("Lambda must be positive."); 22 | } 23 | Exponential { 24 | lambda, 25 | rng: Uniform::new(0., 1.), 26 | } 27 | } 28 | pub fn set_lambda(&mut self, lambda: f64) -> &mut Self { 29 | if lambda <= 0. { 30 | panic!("Lambda must be positive.") 31 | } 32 | self.lambda = lambda; 33 | self 34 | } 35 | } 36 | 37 | impl Default for Exponential { 38 | fn default() -> Self { 39 | Self::new(1.) 40 | } 41 | } 42 | 43 | impl Distribution for Exponential { 44 | type Output = f64; 45 | /// Samples from the given Exponential distribution. 46 | /// 47 | /// # Remarks 48 | /// Uses the [inverse transform 49 | /// sampling](https://en.wikipedia.org/wiki/Inverse_transform_sampling) method. 50 | fn sample(&self) -> f64 { 51 | -self.rng.sample().ln() / self.lambda 52 | } 53 | } 54 | 55 | impl Distribution1D for Exponential { 56 | fn update(&mut self, params: &[f64]) { 57 | self.set_lambda(params[0]); 58 | } 59 | } 60 | 61 | impl Continuous for Exponential { 62 | type PDFType = f64; 63 | /// Calculates the [probability density 64 | /// function](https://en.wikipedia.org/wiki/Probability_density_function) for the given Exponential 65 | /// distribution at `x`. 66 | /// 67 | /// # Remarks 68 | /// 69 | /// Returns `0.` if `x` is negative. 70 | fn pdf(&self, x: f64) -> f64 { 71 | if x < 0. { 72 | return 0.; 73 | } 74 | self.lambda * (-self.lambda * x).exp() 75 | } 76 | } 77 | 78 | impl Mean for Exponential { 79 | type MeanType = f64; 80 | /// Returns the mean of the given exponential distribution. 81 | fn mean(&self) -> f64 { 82 | 1. / self.lambda 83 | } 84 | } 85 | 86 | impl Variance for Exponential { 87 | type VarianceType = f64; 88 | fn var(&self) -> f64 { 89 | 1. / self.lambda.powi(2) 90 | } 91 | } 92 | 93 | #[cfg(test)] 94 | mod tests { 95 | 96 | use super::*; 97 | use crate::statistics::{mean, var}; 98 | use approx_eq::assert_approx_eq; 99 | 100 | #[test] 101 | fn test_moments() { 102 | let data2 = Exponential::new(5.).sample_n(1e6 as usize); 103 | assert_approx_eq!(1. / 5., mean(&data2), 1e-2); 104 | assert_approx_eq!(1. / 25., var(&data2), 1e-2); 105 | } 106 | } 107 | -------------------------------------------------------------------------------- /src/distributions/gamma.rs: -------------------------------------------------------------------------------- 1 | use crate::distributions::*; 2 | use crate::functions::gamma; 3 | 4 | /// Implements the [Gamma](https://en.wikipedia.org/wiki/Gamma_distribution) distribution. 5 | #[derive(Debug, Clone, Copy)] 6 | pub struct Gamma { 7 | /// Shape parameter α. 8 | alpha: f64, 9 | /// Rate parameter β. 10 | beta: f64, 11 | normal_gen: Normal, 12 | uniform_gen: Uniform, 13 | } 14 | 15 | impl Gamma { 16 | /// Create a new Gamma distribution with shape `alpha` and rate `beta`. 17 | /// 18 | /// # Errors 19 | /// Panics if `alpha <= 0` or `beta <= 0`. 20 | pub fn new(alpha: f64, beta: f64) -> Self { 21 | if alpha <= 0. || beta <= 0. { 22 | panic!("Both alpha and beta must be positive."); 23 | } 24 | Gamma { 25 | alpha, 26 | beta, 27 | normal_gen: Normal::new(0., 1.), 28 | uniform_gen: Uniform::new(0., 1.), 29 | } 30 | } 31 | pub fn set_alpha(&mut self, alpha: f64) -> &mut Self { 32 | if alpha <= 0. { 33 | panic!("Alpha must be positive."); 34 | } 35 | self.alpha = alpha; 36 | self 37 | } 38 | pub fn set_beta(&mut self, beta: f64) -> &mut Self { 39 | if beta <= 0. { 40 | panic!("Beta must be positive."); 41 | } 42 | self.beta = beta; 43 | self 44 | } 45 | } 46 | 47 | impl Default for Gamma { 48 | fn default() -> Self { 49 | Self::new(1., 1.) 50 | } 51 | } 52 | 53 | impl Distribution for Gamma { 54 | type Output = f64; 55 | /// Samples from the given Gamma distribution. 56 | /// 57 | /// # Remarks 58 | /// Uses the algorithm from Marsaglia and Tsang 2000. Applies the squeeze 59 | /// method and has nearly constant average time for `alpha >= 1`. 60 | fn sample(&self) -> f64 { 61 | let d = self.alpha - 1. / 3.; 62 | loop { 63 | let (x, v) = loop { 64 | let x = self.normal_gen.sample(); 65 | let v = (1. + x / (9. * d).sqrt()).powi(3); 66 | if v > 0. { 67 | break (x, v); 68 | } 69 | }; 70 | let u = self.uniform_gen.sample(); 71 | if u < 1. - 0.0331 * x.powi(4) { 72 | return d * v / self.beta; 73 | } 74 | if u.ln() < 0.5 * x.powi(2) + d * (1. - v + v.ln()) { 75 | return d * v / self.beta; 76 | } 77 | } 78 | } 79 | } 80 | 81 | impl Distribution1D for Gamma { 82 | fn update(&mut self, params: &[f64]) { 83 | self.set_alpha(params[0]).set_beta(params[1]); 84 | } 85 | } 86 | 87 | impl Continuous for Gamma { 88 | type PDFType = f64; 89 | /// Calculates the probability density function for the given Gamma function at `x`. 90 | /// 91 | /// # Remarks 92 | /// x should be positive. 93 | fn pdf(&self, x: f64) -> f64 { 94 | if x <= 0. { 95 | return 0.; 96 | } 97 | self.beta.powf(self.alpha) / gamma(self.alpha) 98 | * x.powf(self.alpha - 1.) 99 | * (-self.beta * x).exp() 100 | } 101 | } 102 | 103 | impl Mean for Gamma { 104 | type MeanType = f64; 105 | /// Calculates the mean, which for a Gamma(a, b) distribution is given by `a / b`. 106 | fn mean(&self) -> f64 { 107 | self.alpha / self.beta 108 | } 109 | } 110 | 111 | impl Variance for Gamma { 112 | type VarianceType = f64; 113 | /// Calculates the variance of the given Gamma distribution. 114 | fn var(&self) -> f64 { 115 | self.alpha / self.beta.powi(2) 116 | } 117 | } 118 | 119 | #[cfg(test)] 120 | mod tests { 121 | use super::*; 122 | use crate::statistics::{mean, var}; 123 | use approx_eq::assert_approx_eq; 124 | 125 | #[test] 126 | fn test_moments() { 127 | let data = Gamma::new(2., 4.).sample_n(1e6 as usize); 128 | assert_approx_eq!(0.5, mean(&data), 1e-2); 129 | assert_approx_eq!(0.125, var(&data), 1e-2); 130 | } 131 | } 132 | -------------------------------------------------------------------------------- /src/distributions/gumbel.rs: -------------------------------------------------------------------------------- 1 | use crate::distributions::*; 2 | 3 | const EULER_MASCHERONI: f64 = 0.577215664901532860606512090082402431042159335939923598805767234884867726777664670936947063291746749; 4 | const PISQ6: f64 = std::f64::consts::PI; 5 | 6 | /// Implements the [Gumbel](https://en.wikipedia.org/wiki/Gumbel_distribution) distribution. 7 | #[derive(Debug, Clone, Copy)] 8 | pub struct Gumbel { 9 | /// Shape parameter μ. 10 | mu: f64, 11 | /// Rate parameter β. 12 | beta: f64, 13 | uniform_gen: Uniform, 14 | } 15 | 16 | impl Gumbel { 17 | /// Create a new Gumbel distribution with location `mu` and scale `beta`. 18 | /// 19 | /// # Errors 20 | /// Panics if `beta <= 0`. 21 | pub fn new(mu: f64, beta: f64) -> Self { 22 | if beta <= 0. { 23 | panic!("Beta must be positive."); 24 | } 25 | Gumbel { 26 | mu, 27 | beta, 28 | uniform_gen: Uniform::new(0., 1.), 29 | } 30 | } 31 | pub fn set_mu(&mut self, mu: f64) -> &mut Self { 32 | self.mu = mu; 33 | self 34 | } 35 | pub fn set_beta(&mut self, beta: f64) -> &mut Self { 36 | if beta <= 0. { 37 | panic!("Beta must be positive."); 38 | } 39 | self.beta = beta; 40 | self 41 | } 42 | } 43 | 44 | impl Default for Gumbel { 45 | fn default() -> Self { 46 | Self::new(0., 1.) 47 | } 48 | } 49 | 50 | impl Distribution for Gumbel { 51 | type Output = f64; 52 | /// Samples from the given Gumbel distribution. 53 | fn sample(&self) -> Self::Output { 54 | self.mu - self.beta * (-self.uniform_gen.sample().ln()).ln() 55 | } 56 | } 57 | 58 | impl Distribution1D for Gumbel { 59 | fn update(&mut self, params: &[f64]) { 60 | self.set_mu(params[0]).set_beta(params[1]); 61 | } 62 | } 63 | 64 | impl Continuous for Gumbel { 65 | type PDFType = f64; 66 | /// Calculates the probability density function for the given Gumbel function at `x`. 67 | fn pdf(&self, x: f64) -> Self::PDFType { 68 | let z = (x - self.mu) / self.beta; 69 | 1. / self.beta * (-(z + (-z).exp())).exp() 70 | } 71 | } 72 | 73 | impl Mean for Gumbel { 74 | type MeanType = f64; 75 | /// Calculates the mean of the given Gumbel distribution. 76 | fn mean(&self) -> Self::MeanType { 77 | self.mu + self.beta * EULER_MASCHERONI 78 | } 79 | } 80 | 81 | impl Variance for Gumbel { 82 | type VarianceType = f64; 83 | /// Calculates the variance of the given Gumbel distribution. 84 | fn var(&self) -> Self::VarianceType { 85 | PISQ6 * self.beta.powi(2) 86 | } 87 | } 88 | -------------------------------------------------------------------------------- /src/distributions/mod.rs: -------------------------------------------------------------------------------- 1 | //! Provides a unified interface for working with probability distributions. Also implements 2 | //! commonly used (maximum entropy) distributions. 3 | 4 | mod bernoulli; 5 | mod beta; 6 | mod binomial; 7 | mod chi_squared; 8 | mod discreteuniform; 9 | mod exponential; 10 | mod gamma; 11 | mod gumbel; 12 | // mod laplace; 13 | mod multivariatenormal; 14 | mod normal; 15 | mod pareto; 16 | mod poisson; 17 | mod t; 18 | mod uniform; 19 | 20 | use crate::linalg::{Matrix, Vector}; 21 | 22 | /// The primary trait defining a probability distribution. 23 | pub trait Distribution: Send + Sync { 24 | type Output; 25 | /// Samples from the given probability distribution. 26 | fn sample(&self) -> Self::Output; 27 | } 28 | 29 | /// A trait defining a one dimensional distribution. 30 | pub trait Distribution1D: Distribution { 31 | /// Generates a vector of `n` randomly sampled values from the given probability distribution. 32 | fn sample_n(&self, n: usize) -> Vector { 33 | (0..n).map(|_| self.sample()).collect() 34 | } 35 | /// Generates a matrix of size `n x m` with values randomly sampled from the given 36 | /// distribution. 37 | fn sample_matrix(&self, nrows: usize, ncols: usize) -> Matrix { 38 | Matrix::new(self.sample_n(nrows * ncols), nrows as i32, ncols as i32) 39 | } 40 | /// Update the parameters of the distribution. 41 | fn update(&mut self, params: &[f64]); 42 | } 43 | 44 | /// A trait defining a multidimensional probability distribution. 45 | pub trait DistributionND: Distribution { 46 | fn get_dim(&self) -> usize; 47 | /// Generate a matrix of samples, where each row in the matrix is a random sample from the 48 | /// given distribution. 49 | fn sample_n(&self, n: usize) -> Matrix { 50 | let mut data = Vector::with_capacity(n * self.get_dim()); 51 | for _ in 0..n { 52 | data.extend(self.sample()); 53 | } 54 | Matrix::new(data, n as i32, self.get_dim() as i32) 55 | } 56 | } 57 | 58 | /// Provides a trait for computing the mean of a distribution where there is a closed-form 59 | /// expression. 60 | pub trait Mean { 61 | type MeanType; 62 | /// Calculates the mean of the distribution. 63 | fn mean(&self) -> Self::MeanType; 64 | } 65 | 66 | /// Provides a trait for computing the variance of a distribution where there is a closed-form 67 | /// solution. 68 | pub trait Variance { 69 | type VarianceType; 70 | fn var(&self) -> Self::VarianceType; 71 | } 72 | 73 | /// Provides a trait for interacting with continuous probability distributions. 74 | pub trait Continuous { 75 | type PDFType; 76 | /// Calculates the [probability density 77 | /// function](https://en.wikipedia.org/wiki/Probability_density_function) at some value `x`. 78 | fn pdf(&self, x: Self::PDFType) -> f64; 79 | fn ln_pdf(&self, x: Self::PDFType) -> f64 { 80 | self.pdf(x).ln() 81 | } 82 | } 83 | 84 | /// Provides a trait for interacting with discrete probability distributions. 85 | pub trait Discrete: Distribution1D { 86 | /// Calculates the [probability mass function](https://en.wikipedia.org/wiki/Probability_mass_function) at some value `x`. 87 | fn pmf(&self, x: i64) -> f64; 88 | } 89 | 90 | pub use self::bernoulli::Bernoulli; 91 | pub use self::beta::Beta; 92 | pub use self::binomial::Binomial; 93 | pub use self::chi_squared::ChiSquared; 94 | pub use self::discreteuniform::DiscreteUniform; 95 | pub use self::exponential::Exponential; 96 | pub use self::gamma::Gamma; 97 | pub use self::gumbel::Gumbel; 98 | // pub use self::laplace::Laplace; 99 | pub use self::multivariatenormal::*; 100 | pub use self::normal::Normal; 101 | pub use self::pareto::Pareto; 102 | pub use self::poisson::Poisson; 103 | pub use self::t::*; 104 | pub use self::uniform::Uniform; 105 | -------------------------------------------------------------------------------- /src/distributions/multivariatenormal.rs: -------------------------------------------------------------------------------- 1 | use std::f64::consts::PI; 2 | 3 | use super::{Continuous, Distribution, Distribution1D, DistributionND, Mean, Normal, Variance}; 4 | use crate::prelude::{Dot, Matrix, Vector}; 5 | 6 | /// [Multivariate normal distribution](https://en.wikipedia.org/wiki/Multivariate_normal_distribution). 7 | #[derive(Debug, Clone)] 8 | pub struct MVN { 9 | mean: Vector, 10 | covariance_matrix: Matrix, 11 | inverse_covariance_matrix: Matrix, 12 | covariance_determinant: f64, 13 | decomposed_covariance_matrix: Matrix, 14 | } 15 | 16 | pub type MultivariateNormal = MVN; 17 | 18 | impl MVN { 19 | pub fn new(mean: V, covariance_matrix: M) -> Self 20 | where 21 | V: Into, 22 | M: Into, 23 | { 24 | let m = mean.into(); 25 | let c = covariance_matrix.into(); 26 | 27 | assert!(c.is_symmetric(), "covariance matrix must be symmetric"); 28 | assert_eq!( 29 | m.len(), 30 | c.ncols, 31 | "mean vector and covariance matrix must have the same dimensions" 32 | ); 33 | 34 | // don't really want to compute these if not necessary but if you make these option<..> and 35 | // compute only when necessary it gets kind of nasty because you need &mut self for e.g. 36 | // the pdf method which requires the trait definition to be changed. will just eat the cost 37 | // for now and figure something else out. still better than not caching. 38 | let l = (&c).cholesky(); 39 | let cinv = (&c).inv(); 40 | let cdet = (&c).det(); 41 | 42 | Self { 43 | mean: m, 44 | covariance_matrix: c, 45 | inverse_covariance_matrix: cinv, 46 | covariance_determinant: cdet, 47 | decomposed_covariance_matrix: l, 48 | } 49 | } 50 | } 51 | 52 | impl Distribution for MVN { 53 | type Output = Vector; 54 | fn sample(&self) -> Vector { 55 | let z = Normal::default().sample_n(self.mean.len()); 56 | &self.mean + self.decomposed_covariance_matrix.dot(z) 57 | } 58 | } 59 | 60 | impl DistributionND for MVN { 61 | fn get_dim(&self) -> usize { 62 | self.mean.len() 63 | } 64 | } 65 | 66 | impl<'a> Continuous for &'a MVN { 67 | type PDFType = &'a [f64]; 68 | fn pdf(&self, x: Self::PDFType) -> f64 { 69 | assert!(self.covariance_matrix.is_positive_definite()); 70 | assert_eq!(x.len(), self.mean.len()); 71 | 72 | let x_minus_mu: Vector = x 73 | .iter() 74 | .enumerate() 75 | .map(|(i, v)| v - self.mean[i]) 76 | .collect(); 77 | 78 | let numerator = 79 | (-0.5 * x_minus_mu.t_dot(&self.inverse_covariance_matrix.dot(&x_minus_mu))).exp(); 80 | let denominator = ((2. * PI).powi(x.len() as i32) * self.covariance_determinant).sqrt(); 81 | 82 | numerator / denominator 83 | } 84 | 85 | fn ln_pdf(&self, x: Self::PDFType) -> f64 { 86 | assert!(self.covariance_matrix.is_positive_definite()); 87 | assert_eq!(x.len(), self.mean.len()); 88 | 89 | let x_minus_mu: Vector = x 90 | .iter() 91 | .enumerate() 92 | .map(|(i, v)| v - self.mean[i]) 93 | .collect(); 94 | 95 | -0.5 * (self.covariance_determinant.ln() 96 | + x_minus_mu.t_dot(&self.inverse_covariance_matrix.dot(&x_minus_mu)) 97 | + x.len() as f64 * (2. * PI).ln()) 98 | } 99 | } 100 | 101 | impl<'a> Mean for &'a MVN { 102 | type MeanType = &'a [f64]; 103 | fn mean(&self) -> Self::MeanType { 104 | &self.mean 105 | } 106 | } 107 | 108 | impl<'a> Variance for &'a MVN { 109 | type VarianceType = &'a Matrix; 110 | fn var(&self) -> Self::VarianceType { 111 | &self.covariance_matrix 112 | } 113 | } 114 | 115 | #[cfg(test)] 116 | mod tests { 117 | use super::Continuous; 118 | use super::*; 119 | use approx_eq::assert_approx_eq; 120 | 121 | #[test] 122 | fn test_mvn_pdf() { 123 | let mu = Vector::new([ 124 | 0.6971976638355714, 125 | -0.6676833280983583, 126 | -2.0192124253834733, 127 | -1.5335621337312673, 128 | ]); 129 | 130 | let cov = Matrix::new( 131 | [ 132 | 2.247288887309859, 133 | 0.2995972155043716, 134 | 0.5845592696474896, 135 | -0.13434631148751136, 136 | 0.2995972155043716, 137 | 1.3959897541030757, 138 | -0.1601386729230161, 139 | 2.2253865738659315, 140 | 0.5845592696474896, 141 | -0.1601386729230161, 142 | 3.977276244924999, 143 | -1.977313729867125, 144 | -0.13434631148751136, 145 | 2.2253865738659315, 146 | -1.977313729867125, 147 | 8.06177161880807, 148 | ], 149 | 4, 150 | 4, 151 | ); 152 | 153 | let mvn = MVN::new(mu, cov); 154 | 155 | let x1 = Vector::new([ 156 | 0.050102652382139026, 157 | -0.0521232079055611, 158 | 0.6617157383972537, 159 | -0.8086304981120899, 160 | ]); 161 | let x2 = Vector::new([ 162 | 0.8416518707855118, 163 | -1.1531229014478865, 164 | 1.7008635367302818, 165 | -0.6559951109477243, 166 | ]); 167 | let x3 = Vector::new([ 168 | 0.6545389674230797, 169 | 1.739584646246535, 170 | -0.4158677788241667, 171 | 1.2753434275913207, 172 | ]); 173 | 174 | assert_approx_eq!(0.0008500500589160902, (&mvn).pdf(&x1)); 175 | assert_approx_eq!(0.0001612231592518467, (&mvn).pdf(&x2)); 176 | assert_approx_eq!(0.00025701999301292773, (&mvn).pdf(&x3)); 177 | } 178 | } 179 | -------------------------------------------------------------------------------- /src/distributions/pareto.rs: -------------------------------------------------------------------------------- 1 | use crate::distributions::*; 2 | 3 | /// Implements the [Pareto](https://en.wikipedia.org/wiki/Pareto_distribution) distribution. 4 | #[derive(Debug, Clone, Copy)] 5 | pub struct Pareto { 6 | /// Shape parameter α. 7 | alpha: f64, 8 | /// Parameter which controls the minimum value of the distribution. 9 | minval: f64, 10 | } 11 | 12 | impl Pareto { 13 | /// Create a new Pareto distribution with shape `alpha` and minimum value `minval`. 14 | /// 15 | /// # Errors 16 | /// Panics if `alpha <= 0` or `minval <= 0`. 17 | pub fn new(alpha: f64, minval: f64) -> Self { 18 | if alpha <= 0. || minval <= 0. { 19 | panic!("Both alpha and beta must be positive."); 20 | } 21 | Pareto { alpha, minval } 22 | } 23 | pub fn set_alpha(&mut self, alpha: f64) -> &mut Self { 24 | if alpha <= 0. { 25 | panic!("Alpha must be positive."); 26 | } 27 | self.alpha = alpha; 28 | self 29 | } 30 | pub fn set_minval(&mut self, minval: f64) -> &mut Self { 31 | if minval <= 0. { 32 | panic!("minval must be positive."); 33 | } 34 | self.minval = minval; 35 | self 36 | } 37 | } 38 | 39 | impl Default for Pareto { 40 | fn default() -> Self { 41 | Self::new(1., 1.) 42 | } 43 | } 44 | 45 | impl Distribution for Pareto { 46 | type Output = f64; 47 | /// Samples from the given Pareto distribution using inverse transform sampling. 48 | fn sample(&self) -> f64 { 49 | let u = alea::f64(); 50 | self.minval / u.powf(1. / self.alpha) 51 | } 52 | } 53 | 54 | impl Distribution1D for Pareto { 55 | fn update(&mut self, params: &[f64]) { 56 | assert!(params.len() == 2); 57 | self.set_alpha(params[0]).set_minval(params[1]); 58 | } 59 | } 60 | 61 | impl Continuous for Pareto { 62 | type PDFType = f64; 63 | /// Calculates the probability density function for the given Pareto function at `x`. 64 | /// 65 | /// # Remarks 66 | /// This returns 0 if `x < minval` 67 | fn pdf(&self, x: f64) -> f64 { 68 | if x < self.minval { 69 | return 0.; 70 | } 71 | self.alpha * self.minval.powf(self.alpha) / x.powf(self.alpha - 1.) 72 | } 73 | } 74 | 75 | impl Mean for Pareto { 76 | type MeanType = f64; 77 | /// Calculates the mean of the Pareto distribution. 78 | fn mean(&self) -> f64 { 79 | if self.alpha <= 1. { 80 | f64::INFINITY 81 | } else { 82 | self.alpha * self.minval / (self.alpha - 1.) 83 | } 84 | } 85 | } 86 | 87 | impl Variance for Pareto { 88 | type VarianceType = f64; 89 | /// Calculates the variance of the Pareto distribution. 90 | fn var(&self) -> f64 { 91 | if self.alpha <= 2. { 92 | f64::INFINITY 93 | } else { 94 | self.minval.powi(2) * self.alpha / ((self.alpha - 1.).powi(2) * (self.alpha - 2.)) 95 | } 96 | } 97 | } 98 | 99 | #[cfg(test)] 100 | mod tests { 101 | use super::*; 102 | use crate::statistics::{mean, var}; 103 | use approx_eq::assert_approx_eq; 104 | 105 | #[test] 106 | fn test_moments() { 107 | let dist = Pareto::new(4., 4.); 108 | let data = dist.sample_n(1e6 as usize); 109 | assert_approx_eq!(dist.mean(), mean(&data), 0.05); 110 | assert_approx_eq!(dist.var(), var(&data), 0.05); 111 | } 112 | } 113 | -------------------------------------------------------------------------------- /src/distributions/poisson.rs: -------------------------------------------------------------------------------- 1 | use crate::distributions::*; 2 | use crate::functions::gamma; 3 | 4 | /// Implements the [Poisson](https://en.wikipedia.org/wiki/https://en.wikipedia.org/wiki/Poisson_distribution) 5 | /// distribution. 6 | #[derive(Debug, Clone, Copy)] 7 | pub struct Poisson { 8 | /// Rate parameter for the Poisson distribution. 9 | lambda: f64, 10 | } 11 | 12 | impl Poisson { 13 | /// Create a new Poisson distribution with rate parameter `lambda`. 14 | /// 15 | /// # Errors 16 | /// Panics if `lambda <= 0.0`. 17 | pub fn new(lambda: f64) -> Self { 18 | if lambda <= 0. { 19 | panic!("`Lambda` must be positive."); 20 | } 21 | Poisson { lambda } 22 | } 23 | pub fn set_lambda(&mut self, lambda: f64) -> &mut Self { 24 | if lambda <= 0. { 25 | panic!("`Lambda` must be positive.") 26 | } 27 | self.lambda = lambda; 28 | self 29 | } 30 | } 31 | 32 | impl Default for Poisson { 33 | fn default() -> Self { 34 | Self::new(1.) 35 | } 36 | } 37 | 38 | impl Distribution for Poisson { 39 | type Output = f64; 40 | /// Samples from the given Poisson distribution. For `lambda < 10.0`, this is done with the direct (multiplication) method, 41 | /// and for `lambda >= 10.0`, this is done the PTRS transformed rejection method from [Hoermann](https://doi.org/10.1016/0167-6687(93)90997-4). 42 | fn sample(&self) -> f64 { 43 | if self.lambda < 10. { 44 | sample_mult(self.lambda) 45 | } else { 46 | sample_ptrs(self.lambda) 47 | } 48 | } 49 | } 50 | 51 | impl Distribution1D for Poisson { 52 | fn update(&mut self, params: &[f64]) { 53 | self.set_lambda(params[0]); 54 | } 55 | } 56 | 57 | impl Discrete for Poisson { 58 | /// Calculates the [probability mass 59 | /// function](https://en.wikipedia.org/wiki/Probability_mass_function) for the given Poisson 60 | /// distribution at `k`. 61 | /// 62 | fn pmf(&self, k: i64) -> f64 { 63 | if k < 0 { 64 | 0. 65 | } else { 66 | self.lambda.powi(k as i32) * (-self.lambda).exp() / gamma(k as f64) 67 | } 68 | } 69 | } 70 | 71 | impl Mean for Poisson { 72 | type MeanType = f64; 73 | /// Calculates the mean, which is given by the rate parameter. 74 | fn mean(&self) -> f64 { 75 | self.lambda 76 | } 77 | } 78 | 79 | impl Variance for Poisson { 80 | type VarianceType = f64; 81 | /// Calculates the variance, which is given by the rate parameter. 82 | fn var(&self) -> f64 { 83 | self.lambda 84 | } 85 | } 86 | 87 | fn sample_mult(lambda: f64) -> f64 { 88 | let limit: f64 = (-lambda).exp(); 89 | let mut count = 0.; 90 | let mut product: f64 = alea::f64(); 91 | while product > limit { 92 | count += 1.; 93 | product *= alea::f64(); 94 | } 95 | count 96 | } 97 | 98 | #[allow(non_snake_case)] 99 | fn sample_ptrs(lam: f64) -> f64 { 100 | let slam = lam.sqrt(); 101 | let loglam = lam.ln(); 102 | let b = 0.931 + 2.53 * slam; 103 | let a = -0.059 + 0.02483 * b; 104 | let invalpha = 1.1239 + 1.1328 / (b - 3.4); 105 | let vr = 0.9277 - 3.6224 / (b - 2.); 106 | 107 | loop { 108 | let U = alea::f64() - 0.5; 109 | let V = alea::f64(); 110 | let us = 0.5 - U.abs(); 111 | let k = f64::floor((2. * a / us + b) * U + lam + 0.43); 112 | if (us >= 0.07) && (V <= vr) { 113 | return k; 114 | } 115 | if (k < 0.) || (us < 0.013) && (V > us) { 116 | continue; 117 | } 118 | if (V.ln() + invalpha.ln() - (a / (us * us) + b).ln()) 119 | <= (-lam + k * loglam - gamma(k + 1.).ln()) 120 | { 121 | return k; 122 | } 123 | } 124 | } 125 | 126 | #[cfg(test)] 127 | mod tests { 128 | use super::*; 129 | use crate::statistics::{mean, var}; 130 | use approx_eq::assert_approx_eq; 131 | 132 | #[test] 133 | fn test_moments() { 134 | let data5 = self::Poisson::new(5.).sample_n(1e6 as usize); 135 | let mean5 = mean(&data5); 136 | let var5 = var(&data5); 137 | assert_approx_eq!(mean5, 5., 1e-2); 138 | assert_approx_eq!(var5, 5., 1e-2); 139 | 140 | let data42 = self::Poisson::new(42.).sample_n(1e6 as usize); 141 | let mean42 = mean(&data42); 142 | let var42 = var(&data42); 143 | assert_approx_eq!(mean42, 42., 1e-2); 144 | assert_approx_eq!(var42, 42., 1e-2); 145 | } 146 | } 147 | -------------------------------------------------------------------------------- /src/distributions/t.rs: -------------------------------------------------------------------------------- 1 | use crate::distributions::*; 2 | use crate::functions::gamma; 3 | 4 | /// Implements the [Student's T](https://en.wikipedia.org/wiki/Student%27s_t-distribution) distribution. 5 | #[derive(Debug, Clone, Copy)] 6 | pub struct T { 7 | /// Degrees of freedom 8 | dof: f64, 9 | } 10 | 11 | pub type StudentsT = T; 12 | 13 | impl T { 14 | /// Create a new t distribution with 15 | /// 16 | /// # Errors 17 | /// Panics if degrees of freedom is not positive. 18 | pub fn new(dof: f64) -> Self { 19 | assert!(dof > 0., "Degrees of freedom must be positive."); 20 | T { dof } 21 | } 22 | pub fn set_dof(&mut self, dof: f64) -> &mut Self { 23 | assert!(dof > 0., "Degrees of freedom must be positive."); 24 | self.dof = dof; 25 | self 26 | } 27 | } 28 | 29 | impl Default for T { 30 | fn default() -> Self { 31 | Self::new(1.) 32 | } 33 | } 34 | 35 | impl Distribution for T { 36 | type Output = f64; 37 | /// Samples from the given T distribution. 38 | fn sample(&self) -> f64 { 39 | (self.dof / 2.).sqrt() * Normal::default().sample() 40 | / Gamma::new(self.dof / 2., 1.).sample().sqrt() 41 | } 42 | } 43 | 44 | impl Distribution1D for T { 45 | fn update(&mut self, params: &[f64]) { 46 | self.set_dof(params[0]); 47 | } 48 | } 49 | 50 | impl Continuous for T { 51 | type PDFType = f64; 52 | /// Calculates the probability density function for the given T distribution at `x`. 53 | fn pdf(&self, x: f64) -> f64 { 54 | gamma((self.dof + 1.) / 2.) 55 | / ((self.dof * std::f64::consts::PI).sqrt() * gamma(self.dof / 2.)) 56 | * (1. + x.powi(2) / self.dof).powf(-(self.dof - 1.) / 2.) 57 | } 58 | } 59 | 60 | impl Mean for T { 61 | type MeanType = f64; 62 | /// Calculates the mean of the T distribution, which is 0 when the degrees of freedom is 63 | /// greater than 1, and undefined otherwise. 64 | /// 65 | fn mean(&self) -> f64 { 66 | if self.dof > 1. { 67 | 0. 68 | } else { 69 | f64::NAN 70 | } 71 | } 72 | } 73 | 74 | impl Variance for T { 75 | type VarianceType = f64; 76 | /// Calculates the variance of the T distribution. 77 | /// 78 | /// # Remarks 79 | /// This is not defined when degrees of freedom is less than or equal to 1, and infinity when 80 | /// degrees of freedom is in (1, 2]. 81 | fn var(&self) -> f64 { 82 | if self.dof > 2. { 83 | self.dof / (self.dof - 2.) 84 | } else if (1. < self.dof) & (self.dof <= 2.) { 85 | f64::INFINITY 86 | } else { 87 | f64::NAN 88 | } 89 | } 90 | } 91 | 92 | #[cfg(test)] 93 | mod tests { 94 | 95 | use super::*; 96 | use crate::statistics::mean; 97 | use approx_eq::assert_approx_eq; 98 | 99 | #[test] 100 | fn test_moments() { 101 | let t = T::new(2.); 102 | let data = t.sample_n(1e6 as usize); 103 | assert_approx_eq!(mean(&data), 0., 1e-2); 104 | } 105 | } 106 | -------------------------------------------------------------------------------- /src/distributions/uniform.rs: -------------------------------------------------------------------------------- 1 | use crate::distributions::*; 2 | 3 | /// Implements the [Uniform](https://en.wikipedia.org/wiki/Uniform_distribution_(continuous)) 4 | /// distribution. 5 | #[derive(Debug, Clone, Copy)] 6 | pub struct Uniform { 7 | /// Lower bound for the Uniform distribution. 8 | lower: f64, 9 | /// Upper bound for the Uniform distribution. 10 | upper: f64, 11 | } 12 | 13 | impl Uniform { 14 | /// Create a new Uniform distribution with lower bound `lower` and upper bound `upper`. 15 | /// 16 | /// # Errors 17 | /// Panics if `lower > upper`. 18 | pub fn new(lower: f64, upper: f64) -> Self { 19 | if lower > upper { 20 | panic!("`Upper` must be larger than `lower`."); 21 | } 22 | Uniform { lower, upper } 23 | } 24 | pub fn set_lower(&mut self, lower: f64) -> &mut Self { 25 | if lower > self.upper { 26 | panic!("Upper must be larger than lower.") 27 | } 28 | self.lower = lower; 29 | self 30 | } 31 | pub fn set_upper(&mut self, upper: f64) -> &mut Self { 32 | if self.lower > upper { 33 | panic!("Upper must be larger than lower.") 34 | } 35 | self.upper = upper; 36 | self 37 | } 38 | } 39 | 40 | impl Default for Uniform { 41 | fn default() -> Self { 42 | Self::new(0., 1.) 43 | } 44 | } 45 | 46 | impl Distribution for Uniform { 47 | type Output = f64; 48 | /// Samples from the given Uniform distribution. 49 | fn sample(&self) -> f64 { 50 | (self.upper - self.lower) * alea::f64() + self.lower 51 | } 52 | } 53 | 54 | impl Distribution1D for Uniform { 55 | fn update(&mut self, params: &[f64]) { 56 | self.set_lower(params[0]).set_upper(params[1]); 57 | } 58 | } 59 | 60 | impl Continuous for Uniform { 61 | type PDFType = f64; 62 | /// Calculates the [probability density 63 | /// function](https://en.wikipedia.org/wiki/Probability_density_function) for the given Uniform 64 | /// distribution at `x`. 65 | /// 66 | /// # Remarks 67 | /// 68 | /// Returns `0.` if `x` is not in `[lower, upper]` 69 | fn pdf(&self, x: f64) -> f64 { 70 | if x < self.lower || x > self.upper { 71 | 0. 72 | } else { 73 | 1. / (self.upper - self.lower) 74 | } 75 | } 76 | } 77 | 78 | impl Mean for Uniform { 79 | type MeanType = f64; 80 | /// Calculates the mean, which for a Uniform(a, b) distribution is given by `(a + b) / 2`. 81 | fn mean(&self) -> f64 { 82 | (self.lower + self.upper) / 2. 83 | } 84 | } 85 | 86 | impl Variance for Uniform { 87 | type VarianceType = f64; 88 | /// Calculates the variance of the given Uniform distribution. 89 | fn var(&self) -> f64 { 90 | (self.upper - self.lower).powi(2) / 12. 91 | } 92 | } 93 | 94 | #[test] 95 | fn inrange() { 96 | let u = self::Uniform::new(-2., 6.); 97 | let samples = u.sample_n(100); 98 | samples.into_iter().for_each(|x| { 99 | assert!(-2. <= x); 100 | assert!(x <= 6.); 101 | }) 102 | } 103 | -------------------------------------------------------------------------------- /src/functions/combinatorial.rs: -------------------------------------------------------------------------------- 1 | //! Combinatorial functions. 2 | 3 | use crate::functions::gamma; 4 | /// Calculates the [binomial coefficient](https://en.wikipedia.org/wiki/Binomial_coefficient) 5 | /// nCk for two integers `n` and `k`, with `n >= k`. 6 | /// 7 | pub fn binom_coeff(n: u64, k: u64) -> u64 { 8 | let mut nk = k; 9 | if k > n - k { 10 | nk = n - k; 11 | } 12 | 13 | let mut c = 1; 14 | for i in 1..=nk { 15 | if c / i > std::u64::MAX / nk { 16 | return 0; 17 | } 18 | c = c / i * (n - i + 1) + c % i * (n - i + 1) / i; 19 | } 20 | c 21 | } 22 | 23 | /// An alternative method for computing binomial coefficients. There is no significant difference 24 | /// between the compute time using the `binom_coeff` method and this method. This method becomes 25 | /// slightly inaccurate (by 1 or 2) starting at `n ~ 50`. 26 | pub fn binom_coeff_alt(n: u64, k: u64) -> u64 { 27 | (gamma(n as f64 + 1.).ln() - gamma(k as f64 + 1.).ln() - gamma((n - k) as f64 + 1.).ln()) 28 | .exp() 29 | .round() as u64 30 | } 31 | 32 | #[cfg(test)] 33 | mod tests { 34 | use super::*; 35 | use crate::distributions::{DiscreteUniform, Distribution, Distribution1D}; 36 | 37 | #[test] 38 | fn test_binom_methods() { 39 | let n: Vec = DiscreteUniform::new(5, 45) 40 | .sample_n(1000) 41 | .iter() 42 | .map(|x| *x as u64) 43 | .collect(); 44 | let k: Vec = n 45 | .iter() 46 | .map(|x| DiscreteUniform::new(0, *x as i64).sample() as u64) 47 | .collect(); 48 | for i in 0..1000 { 49 | assert_eq!(binom_coeff(n[i], k[i]), binom_coeff_alt(n[i], k[i])); 50 | } 51 | } 52 | 53 | #[test] 54 | fn test_binom_pascal() { 55 | let n: Vec = DiscreteUniform::new(5, 50) 56 | .sample_n(1000) 57 | .iter() 58 | .map(|x| *x as u64) 59 | .collect(); 60 | let k: Vec = n 61 | .iter() 62 | .map(|x| DiscreteUniform::new(0, (*x - 1) as i64).sample() as u64) 63 | .collect(); 64 | for i in 0..1000 { 65 | assert_eq!( 66 | binom_coeff(n[i], k[i]) + binom_coeff(n[i], k[i] + 1), 67 | binom_coeff(n[i] + 1, k[i] + 1) 68 | ); 69 | } 70 | } 71 | } 72 | -------------------------------------------------------------------------------- /src/functions/gamma.rs: -------------------------------------------------------------------------------- 1 | //! Gamma and related functions. 2 | 3 | #![allow(clippy::excessive_precision)] 4 | 5 | use std::f64::consts::PI; 6 | 7 | const G: f64 = 4.7421875 + 1.; 8 | 9 | /// Coefficients from [here](https://my.fit.edu/~gabdo/gamma.txt). 10 | const GAMMA_COEFFS: [f64; 14] = [ 11 | 57.156235665862923517, 12 | -59.597960355475491248, 13 | 14.136097974741747174, 14 | -0.49191381609762019978, 15 | 0.33994649984811888699e-4, 16 | 0.46523628927048575665e-4, 17 | -0.98374475304879564677e-4, 18 | 0.15808870322491248884e-3, 19 | -0.21026444172410488319e-3, 20 | 0.21743961811521264320e-3, 21 | -0.16431810653676389022e-3, 22 | 0.84418223983852743293e-4, 23 | -0.26190838401581408670e-4, 24 | 0.36899182659531622704e-5, 25 | ]; 26 | 27 | /// Calculates the [Gamma function](https://en.wikipedia.org/wiki/Gamma_function) using the [Lanczos 28 | /// approximation](https://en.wikipedia.org/wiki/Lanczos_approximation). It obeys the equation 29 | /// `gamma(x+1) = gamma(x) * x`. This approximation uses the reflection formula to extend the 30 | /// calculation to the entire complex plane. 31 | pub fn gamma(z: f64) -> f64 { 32 | if z < 0.5 { 33 | PI / ((PI * z).sin() * gamma(1. - z)) 34 | } else { 35 | let mut x = 0.99999999999999709182; 36 | for (idx, val) in GAMMA_COEFFS.iter().enumerate() { 37 | x += val / ((z - 1.) + (idx as f64) + 1.); 38 | } 39 | let t = (z - 1.) + G - 0.5; 40 | ((2. * PI) as f64).sqrt() * t.powf((z - 1.) + 0.5) * (-t).exp() * x 41 | } 42 | } 43 | 44 | /// Calculates the [beta function](https://en.wikipedia.org/wiki/Beta_function) using the 45 | /// relationship between the beta function and the gamma function. 46 | pub fn beta(a: f64, b: f64) -> f64 { 47 | gamma(a) * gamma(b) / gamma(a + b) 48 | } 49 | 50 | /// Calculates the [digamma function](https://en.wikipedia.org/wiki/Digamma_function), which is the 51 | /// logarithmic derivative of the gamma function. It obeys the equation `digamma(x+1) = digamma(x) 52 | /// + 1/x`. The approximation works better for large values. If the value is small, this function 53 | /// will shift it up using the digamma recurrence relation. 54 | pub fn digamma(x: f64) -> f64 { 55 | if x < 6. { 56 | digamma(x + 1.) - 1. / x 57 | } else { 58 | x.ln() - 1. / (2. * x) - 1. / (12. * x.powi(2)) + 1. / (120. * x.powi(4)) 59 | - 1. / (252. * x.powi(6)) 60 | + 1. / (240. * x.powi(8)) 61 | - 5. / (660. * x.powi(10)) 62 | + 691. / (32760. * x.powi(12)) 63 | - 1. / (12. * x.powi(14)) 64 | } 65 | } 66 | 67 | #[cfg(test)] 68 | mod tests { 69 | use super::*; 70 | use approx_eq::assert_approx_eq; 71 | 72 | #[test] 73 | fn test_gamma() { 74 | assert_approx_eq!(gamma(0.1), 9.513507698668731836292487); 75 | assert_approx_eq!(gamma(0.5), 1.7724538509551602798167); 76 | assert_approx_eq!(gamma(6.), 120.); 77 | assert_approx_eq!(gamma(20.), 121645100408832000.); 78 | assert_approx_eq!(gamma(-0.5), -3.54490770181103205459); 79 | } 80 | 81 | #[test] 82 | fn test_beta() { 83 | assert_approx_eq!(beta(1., 3.12345), 1. / 3.12345); 84 | assert_approx_eq!(beta(2.1313, 1. - 2.1313), PI / (PI * 2.1313).sin()); 85 | assert_approx_eq!( 86 | beta(7.2, 0.23) * beta(7.2 + 0.23, 1. - 0.23), 87 | PI / (7.2 * (PI * 0.23).sin()) 88 | ); 89 | } 90 | 91 | #[test] 92 | fn test_digamma() { 93 | assert_approx_eq!(digamma(21. + 1.), digamma(21.) + 1. / 21.); 94 | assert_approx_eq!(digamma(2. + 1.), digamma(2.) + 1. / 2.); 95 | assert_approx_eq!(digamma(0.5), -1.96351002602142347944097633); 96 | assert_approx_eq!(digamma(-0.5), 0.036489973978576520559023667); 97 | assert_approx_eq!(digamma(1.), -0.57721566490153286060651209); 98 | } 99 | } 100 | -------------------------------------------------------------------------------- /src/functions/interpolate.rs: -------------------------------------------------------------------------------- 1 | use crate::prelude::Vector; 2 | 3 | pub enum ExtrapolationMode { 4 | Panic, 5 | Fill(f64, f64), 6 | Extrapolate, 7 | } 8 | 9 | pub fn interp1d_linear( 10 | x: &[f64], 11 | y: &[f64], 12 | tgt: &[f64], 13 | extrapolate: ExtrapolationMode, 14 | ) -> Vector { 15 | // Performs linear interpolation on an array of values, given some (x, y) pairs. 16 | // Assumes that x is sorted (in ascending order). 17 | // 18 | // Inputs: 19 | // x --- 1d array of x values 20 | // y --- 1d array of y values, which must be the same size as x 21 | // tgt - x values to apply interpolation to 22 | // extrapolate -- bool indicating whether to extrapolate for points outside the range of x 23 | // 24 | // Outputs: 25 | // interp - interpolated (extrapolated) y values corresponding to the input tgt values 26 | 27 | assert_eq!(x.len(), y.len(), "x and y must have the same size"); 28 | 29 | let n = x.len(); 30 | 31 | // for now, x must be in ascending order (TODO: relax this when there is argsort implementation) 32 | for i in 0..n - 1 { 33 | if x[i + 1] - x[i] < 0. { 34 | panic!("x must be sorted in ascending order"); 35 | } 36 | } 37 | 38 | interp1d_linear_unchecked(x, y, tgt, extrapolate) 39 | } 40 | 41 | #[inline(always)] 42 | pub fn interp1d_linear_unchecked( 43 | x: &[f64], 44 | y: &[f64], 45 | tgt: &[f64], 46 | extrapolate: ExtrapolationMode, 47 | ) -> Vector { 48 | // Performs linear interpolation on an array of values, given some (x, y) pairs. 49 | // Assumes, and does not check, that x is sorted (in ascending order). 50 | // 51 | // Inputs: 52 | // x --- 1d array of x values 53 | // y --- 1d array of y values, which must be the same size as x 54 | // tgt - x values to apply interpolation to 55 | // extrapolate -- bool indicating whether to extrapolate for points outside the range of x 56 | // 57 | // Outputs: 58 | // interp - interpolated (extrapolated) y values corresponding to the input tgt values 59 | 60 | assert_eq!(x.len(), y.len(), "x and y must have the same size"); 61 | 62 | let n = x.len(); 63 | 64 | let k = tgt.len(); 65 | 66 | // // interpolated values 67 | let mut interp = Vector::with_capacity(k); 68 | 69 | for i in 0..k { 70 | // find the closest supplied x (lower and upper) 71 | let mut idx = 0; 72 | for j in 0..n - 1 { 73 | if x[j] > tgt[i] { 74 | break; 75 | } 76 | idx += 1; 77 | } 78 | 79 | // out of bounds, optionally extrapolate 80 | if idx == 0 || idx > n { 81 | match extrapolate { 82 | ExtrapolationMode::Panic => panic!( 83 | "Target out of bounds, need to extrapolate, but extrapolation mode is panic!" 84 | ), 85 | ExtrapolationMode::Fill(left, right) => { 86 | if idx == 0 { 87 | interp.push(left); 88 | } else if idx > n { 89 | interp.push(right); 90 | } 91 | } 92 | ExtrapolationMode::Extrapolate => { 93 | // extrapolate left 94 | if idx == 0 { 95 | /* print("extrapolating left ", tgt[i]); */ 96 | let slope = (y[1] - y[0]) / (x[1] - x[0]); 97 | interp.push(-slope * (x[0] - tgt[i]) + y[0]); 98 | } 99 | // extrapolate right 100 | else if idx > n { 101 | /* print("extrapolating right ", tgt[i]); */ 102 | let slope = (y[n] - y[n - 1]) / (x[n] - x[n - 1]); 103 | interp.push(slope * (tgt[i] - x[n]) + y[n]); 104 | } 105 | } 106 | } 107 | } 108 | // within bounds, do normal interpolation 109 | else { 110 | // how close is target to the closest lower x and upper x? 111 | let ratio = (tgt[i] - x[idx - 1]) / (x[idx] - x[idx - 1]); 112 | 113 | // interpolate y value based on ratio 114 | interp.push(ratio * y[idx] + (1. - ratio) * y[idx - 1]); 115 | } 116 | } 117 | 118 | return interp; 119 | } 120 | 121 | #[cfg(test)] 122 | mod test { 123 | use crate::prelude::arange; 124 | 125 | use super::*; 126 | 127 | #[test] 128 | fn test_interp1d_linear_1() { 129 | let x = Vector::from([0., 0.5, 1., 1.5, 2., 2.5, 3.]); 130 | let y = x.exp().cos() + x.sin(); 131 | let xnew = arange(-2., 6., 0.2); 132 | let ynew = interp1d_linear(&x, &y, &xnew, ExtrapolationMode::Extrapolate); 133 | 134 | let ynew_true = Vector::from([ 135 | 1.09519, 1.0397, 0.984215, 0.928726, 0.873237, 0.817748, 0.762259, 0.70677, 0.651281, 136 | 0.595791, 0.540302, 0.484813, 0.429324, 0.307211, 0.118474, -0.0702629, 0.265377, 137 | 0.601016, 0.8866, 1.12213, 1.35765, 1.42487, 1.49208, 1.3145, 0.892106, 0.469715, 138 | 0.0473239, -0.375067, -0.797458, -1.21985, -1.64224, -2.06463, -2.48702, -2.90941, 139 | -3.3318, -3.75419, -4.17658, -4.59898, -5.02137, -5.44376, 140 | ]); 141 | 142 | assert!(ynew.close_to(&ynew_true, 1e-3)); 143 | } 144 | 145 | #[test] 146 | fn test_interp1d_linear_2() { 147 | let x = arange(-1., 4., 0.2); 148 | let y = (-x.exp()).cos(); 149 | let xnew = arange(-2., 6., 0.2); 150 | let ynew = interp1d_linear(&x, &y, &xnew, ExtrapolationMode::Extrapolate); 151 | 152 | let ynew_true = Vector::from([ 153 | 1.09485857, 154 | 1.06250527, 155 | 1.03015197, 156 | 0.99779867, 157 | 0.96544537, 158 | 0.93309208, 159 | 0.90073878, 160 | 0.85314506, 161 | 0.78362288, 162 | 0.68314866, 163 | 0.54030231, 164 | 0.34232808, 165 | 0.07888957, 166 | -0.2486851, 167 | -0.60895668, 168 | -0.91173391, 169 | -0.98410682, 170 | -0.61089378, 171 | 0.23832758, 172 | 0.97285375, 173 | 0.44835624, 174 | -0.92115269, 175 | 0.02759859, 176 | 0.62366998, 177 | -0.74070077, 178 | 0.32859476, 179 | 0.82521644, 180 | 0.11868939, 181 | 0.452814, 182 | 0.75253893, 183 | 1.05226386, 184 | 1.35198879, 185 | 1.65171372, 186 | 1.95143865, 187 | 2.25116358, 188 | 2.55088852, 189 | 2.85061345, 190 | 3.15033838, 191 | 3.45006331, 192 | 3.74978824, 193 | ]); 194 | 195 | assert!(ynew.close_to(&ynew_true, 1e-3)); 196 | } 197 | } 198 | -------------------------------------------------------------------------------- /src/functions/mod.rs: -------------------------------------------------------------------------------- 1 | //! Various mathematical and statistical functions. 2 | 3 | mod combinatorial; 4 | mod gamma; 5 | mod interpolate; 6 | mod statistical; 7 | 8 | pub use self::combinatorial::*; 9 | pub use self::gamma::*; 10 | pub use self::interpolate::*; 11 | pub use self::statistical::*; 12 | -------------------------------------------------------------------------------- /src/functions/statistical.rs: -------------------------------------------------------------------------------- 1 | //! Various mathematical functions commonly used in statistics. 2 | 3 | /// Calculates the standard [logistic function](https://en.wikipedia.org/wiki/Logistic_function) 4 | pub fn logistic(x: f64) -> f64 { 5 | 1. / (1. + (-x).exp()) 6 | } 7 | 8 | /// Calculates the [logit function](https://en.wikipedia.org/wiki/Logit) 9 | pub fn logit(p: f64) -> f64 { 10 | if !(0. ..=1.).contains(&p) { 11 | panic!("p must be in [0, 1]"); 12 | } 13 | (p / (1. - p)).ln() 14 | } 15 | 16 | /// Calculates the one-parameter Box-Cox transformation with some power parameter `lambda`. 17 | pub fn boxcox(x: f64, lambda: f64) -> f64 { 18 | assert!(x > 0., "x must be positive"); 19 | if lambda == 0. { 20 | x.ln() 21 | } else { 22 | (x.powf(lambda) - 1.) / lambda 23 | } 24 | } 25 | 26 | /// Calculates the two-parameter Box-Cox transformation with some power parameter `lambda` and some 27 | /// shift parameter `alpha`. 28 | pub fn boxcox_shifted(x: f64, lambda: f64, alpha: f64) -> f64 { 29 | assert!(x > alpha, "x must larger than alpha"); 30 | if lambda == 0. { 31 | (x + alpha).ln() 32 | } else { 33 | ((x + alpha).powf(lambda) - 1.) / lambda 34 | } 35 | } 36 | 37 | /// Calculates the softmax (the normalized exponential) function, which is a generalization of the 38 | /// logistic function to multiple dimensions. 39 | /// 40 | /// Takes in a vector of real numbers and normalizes it to a probability distribution such that 41 | /// each of the components are in the interval (0, 1) and the components add up to 1. Larger input 42 | /// components correspond to larger probabilities. 43 | pub fn softmax(x: &[f64]) -> Vec { 44 | let sum_exp: f64 = x.iter().map(|i| i.exp()).sum(); 45 | x.iter().map(|i| i.exp() / sum_exp).collect() 46 | } 47 | 48 | const ERF_P: f64 = 0.3275911; 49 | const ERF_A1: f64 = 0.254829592; 50 | const ERF_A2: f64 = -0.284496736; 51 | const ERF_A3: f64 = 1.421413741; 52 | const ERF_A4: f64 = -1.453152027; 53 | const ERF_A5: f64 = 1.061405429; 54 | 55 | /// Calculates the [error function](https://en.wikipedia.org/wiki/Error_function) erf(x). 56 | /// 57 | /// # Remarks 58 | /// Uses Equation 7.1.26 in Stegun in combination with Horner's Rule. 59 | pub fn erf(x: f64) -> f64 { 60 | if x >= 0. { 61 | let t = 1. / (1. + ERF_P * x); 62 | 1. - (((((ERF_A5 * t + ERF_A4) * t) + ERF_A3) * t + ERF_A2) * t + ERF_A1) 63 | * t 64 | * (-x * x).exp() 65 | } else { 66 | // erf is an odd function 67 | -erf(-x) 68 | } 69 | } 70 | 71 | #[cfg(test)] 72 | mod tests { 73 | use super::*; 74 | use crate::distributions::{Distribution1D, Exponential, Uniform}; 75 | use approx_eq::assert_approx_eq; 76 | 77 | #[test] 78 | fn test_logistic() { 79 | let d = Exponential::new(5.).sample_n(100 as usize); 80 | d.iter().for_each(|x| { 81 | assert_approx_eq!(logistic(*x) + logistic(-*x), 1.); 82 | }); 83 | for i in 0..d.len() { 84 | for j in i..d.len() { 85 | if d[i] >= d[j] { 86 | assert!(logistic(d[i]) >= logistic(d[j])); 87 | } 88 | } 89 | } 90 | assert_eq!(logistic(f64::NEG_INFINITY), 0.); 91 | assert_eq!(logistic(0.), 0.5); 92 | assert_eq!(logistic(f64::INFINITY), 1.); 93 | } 94 | 95 | #[test] 96 | fn test_logit() { 97 | let d = Uniform::new(0., 1.).sample_n(100 as usize); 98 | d.iter().for_each(|x| { 99 | assert_approx_eq!(*x, logistic(logit(*x))); 100 | assert_approx_eq!(*x, logit(logistic(*x))); 101 | }); 102 | for i in 0..d.len() { 103 | for j in (i + 1)..d.len() { 104 | assert_approx_eq!( 105 | logit(d[i]) - logit(d[j]), 106 | ((d[i] / (1. - d[i])) / (d[j] / (1. - d[j]))).ln() 107 | ); 108 | } 109 | } 110 | assert_eq!(logit(0.), f64::NEG_INFINITY); 111 | assert_eq!(logit(0.5), 0.); 112 | assert_eq!(logit(1.), f64::INFINITY); 113 | } 114 | 115 | #[test] 116 | fn test_softmax() { 117 | let orig = vec![1., 2., 3., 4., 1., 2., 3.]; 118 | let tfm = vec![ 119 | 0.02364054, 0.06426166, 0.1746813, 0.474833, 0.02364054, 0.06426166, 0.1746813, 120 | ]; 121 | let smv = softmax(&orig); 122 | for i in 0..smv.len() { 123 | assert_approx_eq!(smv[i], tfm[i]); 124 | } 125 | assert_approx_eq!(smv.iter().sum(), 1.); 126 | } 127 | 128 | #[test] 129 | fn test_erf() { 130 | assert_approx_eq!(erf(0.), 0., 1e-5); 131 | assert_approx_eq!(erf(0.02), 0.022564575, 1e-5); 132 | assert_approx_eq!(erf(0.04), 0.045111106, 1e-5); 133 | assert_approx_eq!(erf(0.06), 0.067621594, 1e-5); 134 | assert_approx_eq!(erf(0.08), 0.090078126, 1e-5); 135 | assert_approx_eq!(erf(0.1), 0.112462916, 1e-5); 136 | assert_approx_eq!(erf(0.2), 0.222702589, 1e-5); 137 | assert_approx_eq!(erf(0.3), 0.328626759, 1e-5); 138 | assert_approx_eq!(erf(0.4), 0.428392355, 1e-5); 139 | assert_approx_eq!(erf(0.5), 0.520499878, 1e-5); 140 | assert_approx_eq!(erf(0.6), 0.603856091, 1e-5); 141 | assert_approx_eq!(erf(0.7), 0.677801194, 1e-5); 142 | assert_approx_eq!(erf(0.8), 0.742100965, 1e-5); 143 | assert_approx_eq!(erf(0.9), 0.796908212, 1e-5); 144 | assert_approx_eq!(erf(1.), 0.842700793, 1e-5); 145 | assert_approx_eq!(erf(1.1), 0.88020507, 1e-5); 146 | assert_approx_eq!(erf(1.2), 0.910313978, 1e-5); 147 | assert_approx_eq!(erf(1.3), 0.934007945, 1e-5); 148 | assert_approx_eq!(erf(1.4), 0.95228512, 1e-5); 149 | assert_approx_eq!(erf(1.5), 0.966105146, 1e-5); 150 | assert_approx_eq!(erf(1.6), 0.976348383, 1e-5); 151 | assert_approx_eq!(erf(1.7), 0.983790459, 1e-5); 152 | assert_approx_eq!(erf(1.8), 0.989090502, 1e-5); 153 | assert_approx_eq!(erf(1.9), 0.992790429, 1e-5); 154 | assert_approx_eq!(erf(2.), 0.995322265, 1e-5); 155 | assert_approx_eq!(erf(2.1), 0.997020533, 1e-5); 156 | assert_approx_eq!(erf(2.2), 0.998137154, 1e-5); 157 | assert_approx_eq!(erf(2.3), 0.998856823, 1e-5); 158 | assert_approx_eq!(erf(2.4), 0.999311486, 1e-5); 159 | assert_approx_eq!(erf(2.5), 0.999593048, 1e-5); 160 | assert_approx_eq!(erf(3.), 0.99997791, 1e-5); 161 | assert_approx_eq!(erf(3.5), 0.999999257, 1e-5); 162 | } 163 | } 164 | -------------------------------------------------------------------------------- /src/integrate/functions.rs: -------------------------------------------------------------------------------- 1 | //! Algorithms for computing integrals of 1D functions. 2 | 3 | #![allow(unused_variables)] 4 | use crate::linalg::Matrix; 5 | use approx_eq::rel_diff; 6 | 7 | /// Integrate a function `f` from `a` to `b` using the [trapezoid rule](https://en.wikipedia.org/wiki/Trapezoidal_rule) with `n` partitions. 8 | pub fn trapz(f: F, a: f64, b: f64, n: usize) -> f64 9 | where 10 | F: Fn(f64) -> f64, 11 | { 12 | let dx: f64 = (b - a) / (n as f64); 13 | dx * ((0..n).map(|k| f(a + k as f64 * dx)).sum::() + (f(b) + f(a)) / 2.) 14 | } 15 | 16 | /// Integrate a function `f` from `a` to `b` using the [Romberg method](https://en.wikipedia.org/wiki/Romberg%27s_method), 17 | /// stopping after either sequential estimates are less than `eps` or `n` steps have been taken. 18 | pub fn romberg(f: F, a: f64, b: f64, eps: f64, nmax: usize) -> f64 19 | where 20 | F: Fn(f64) -> f64, 21 | { 22 | // let mut r: Vec> = vec![vec![0.; nmax]; nmax]; 23 | let mut r = Matrix::zeros(nmax, nmax); 24 | 25 | r[[0, 0]] = (b - a) / 2. * (f(a) + f(b)); 26 | 27 | for n in 1..nmax { 28 | let hn = (b - a) / 2_f64.powi(n as i32); 29 | let s: f64 = (1..=2_u32.pow((n - 1) as u32)) 30 | .map(|k| f(a + (2 * k - 1) as f64 * hn)) 31 | .sum(); 32 | r[[n, 0]] = 0.5 * r[n - 1][0] + hn * s; 33 | } 34 | 35 | for n in 1..nmax { 36 | for m in 1..=n { 37 | r[[n, m]] = 38 | r[[n, m - 1]] + (r[[n, m - 1]] - r[[n - 1, m - 1]]) / (4_f64.powi(m as i32) - 1.); 39 | } 40 | if n > 1 41 | && (rel_diff(r[[n, n]], r[[n - 1, n - 1]]) < eps 42 | || (r[[n, n]] - r[[n - 1, n - 1]]).abs() < eps) 43 | { 44 | return r[[n, n]]; 45 | } 46 | } 47 | 48 | r[[nmax - 1, nmax - 1]] 49 | } 50 | 51 | ///// Given upper and lower limits of integration, this function calculates the nodes `x` and weights 52 | ///// `w` for the n-point Gauss-Legendre quadrature. 53 | //fn gau_leg_weights(a: f64, b: f64, n: u32) -> (Vec, Vec) { 54 | // unimplemented!(); 55 | //} 56 | 57 | ///// 58 | ///// Given upper and lower limits of integration, this function calculates the nodes `x` and weights 59 | ///// `w` for the n-point Gauss-Laguerre quadrature. 60 | //fn gau_lag_weights(a: f64, b: f64, n: u32) -> (Vec, Vec) { 61 | // unimplemented!(); 62 | //} 63 | 64 | ///// Given upper and lower limits of integration, this function calculates the nodes `x` and weights 65 | ///// `w` for the n-point Gauss-Jacobi quadrature. 66 | //fn gau_jac_weights(a: f64, b: f64, n: u32) -> (Vec, Vec) { 67 | // unimplemented!(); 68 | //} 69 | 70 | ///// Given upper and lower limits of integration, this function calculates the nodes `x` and weights 71 | ///// `w` for the n-point Gauss-Hermite quadrature. 72 | //fn gau_her_weights(a: f64, b: f64, n: u32) -> (Vec, Vec) { 73 | // unimplemented!(); 74 | //} 75 | 76 | /// Integrate a function `f` from `a` to `b` using the [Gauss-Legendre quadrature 77 | /// method](https://en.wikipedia.org/wiki/Gaussian_quadrature) with 5 points (allows for 78 | /// exact integration of polynomials up to degree 9). 79 | pub fn quad5(f: F, a: f64, b: f64) -> f64 80 | where 81 | F: Fn(f64) -> f64, 82 | { 83 | let xm = 0.5 * (b + a); 84 | let xr = 0.5 * (b - a); 85 | (0..5) 86 | .map(|i| { 87 | let dx = xr * GAUSS_QUAD_NODES[i]; 88 | GAUSS_QUAD_WEIGHTS[i] * (f(xm + dx) + f(xm - dx)) 89 | }) 90 | .sum::() 91 | * xr 92 | } 93 | 94 | const GAUSS_QUAD_NODES: [f64; 5] = [ 95 | 0.1488743389816312, 96 | 0.4333953941292472, 97 | 0.6794095682990244, 98 | 0.8650633666889845, 99 | 0.9739065285171717, 100 | ]; 101 | const GAUSS_QUAD_WEIGHTS: [f64; 5] = [ 102 | 0.2955242247147529, 103 | 0.2692667193099963, 104 | 0.2190863625159821, 105 | 0.1494513491505806, 106 | 0.0666713443086881, 107 | ]; 108 | 109 | #[cfg(test)] 110 | mod tests { 111 | use super::*; 112 | use approx_eq::assert_approx_eq; 113 | use std::f64::consts::PI; 114 | 115 | #[test] 116 | pub fn test_integrators() { 117 | let f1 = |x: f64| x * (1. + 2. * x).sqrt(); 118 | assert_approx_eq!(trapz(f1, 4., 0., 1000), -298. / 15., 1e-2); 119 | assert_approx_eq!(romberg(f1, 4., 0., 1e-8, 20), -298. / 15.); 120 | 121 | let f2 = |x: f64| x.sin().powi(2) * x.cos().powi(2); 122 | assert_approx_eq!(trapz(f2, -2., 2., 1000), (8. - 8_f64.sin()) / 16., 1e-2); 123 | assert_approx_eq!(romberg(f2, -2., 2., 1e-8, 20), (8. - 8_f64.sin()) / 16.); 124 | 125 | let f3 = |x: f64| x.ln() / x; 126 | assert_approx_eq!( 127 | trapz(f3, 3., 6., 1000), 128 | 0.5 * 2_f64.ln() * 18_f64.ln(), 129 | 1e-2 130 | ); 131 | assert_approx_eq!( 132 | romberg(f3, 3., 6., 1e-8, 10), 133 | 0.5 * 2_f64.ln() * 18_f64.ln() 134 | ); 135 | 136 | let f4 = |x: f64| x.sin().powi(3) * x.cos(); 137 | assert_approx_eq!(trapz(f4, 0., PI / 3., 1000), 9. / 64., 1e-2); 138 | assert_approx_eq!(romberg(f4, 0., PI / 3., 1e-8, 20), 9. / 64.); 139 | 140 | let f5 = |x: f64| 1. / (3. * x - 7.).powi(2); 141 | assert_approx_eq!(trapz(f5, 3., 4., 1000), 0.1, 1e-2); 142 | assert_approx_eq!(romberg(f5, 3., 4., 1e-8, 20), 0.1); 143 | } 144 | } 145 | -------------------------------------------------------------------------------- /src/integrate/mod.rs: -------------------------------------------------------------------------------- 1 | //! Algorithms for integrating functions and ODEs. 2 | 3 | mod functions; 4 | mod samples; 5 | // mod odes; 6 | 7 | pub use functions::*; 8 | pub use samples::*; 9 | // pub use odes::*; 10 | -------------------------------------------------------------------------------- /src/integrate/odes.rs: -------------------------------------------------------------------------------- 1 | 2 | -------------------------------------------------------------------------------- /src/integrate/samples.rs: -------------------------------------------------------------------------------- 1 | use crate::linalg::Vector; 2 | 3 | /// Integrate samples from a function 4 | pub fn trapezoid(y: &[f64], x: Option<&[f64]>, dx: Option) -> f64 { 5 | let diff_x = if let Some(xarr) = x { 6 | assert_eq!(y.len(), xarr.len(), "x and y must have the same length."); 7 | assert!(dx.is_none(), "Since x was passed, dx must be None"); 8 | (1..xarr.len()) 9 | .map(|i| xarr[i] - xarr[i - 1]) 10 | .collect::() 11 | } else { 12 | Vector::ones(y.len() - 1) * if let Some(diff) = dx { diff } else { 1. } 13 | }; 14 | 15 | (1..y.len()) 16 | .map(|i| (y[i] + y[i - 1]) / 2. * diff_x[i - 1]) 17 | .sum() 18 | } 19 | 20 | #[cfg(test)] 21 | mod test { 22 | use approx_eq::assert_approx_eq; 23 | 24 | use super::*; 25 | 26 | #[test] 27 | fn test_trapezoid() { 28 | let x = vec![1., 2., 3., 5., 6., 7.]; 29 | let y = vec![2., 4., 5., 6., 5., 2.]; 30 | let int = trapezoid(&y, Some(&x), None); 31 | assert_approx_eq!(int, 27.5); 32 | 33 | let x = vec![ 34 | -1.2492823978867575, 35 | -1.0827721123898908, 36 | -0.9406043223301596, 37 | -0.7680308246853681, 38 | -0.4229503089687044, 39 | 0.015055841196579461, 40 | 0.2583590719224359, 41 | 0.6164607574036753, 42 | 0.8938966780618812, 43 | 1.6356468475989316, 44 | ]; 45 | 46 | let y = vec![ 47 | -1.03464106, 48 | 0.51438613, 49 | 0.1316389, 50 | -0.53705463, 51 | -1.18204451, 52 | -1.12118844, 53 | 1.08181116, 54 | -1.0660365, 55 | -1.5044052, 56 | -0.58971003, 57 | ]; 58 | 59 | let int = trapezoid(&y, Some(&x), None); 60 | assert_approx_eq!(int, -1.9685902719500574); 61 | } 62 | } 63 | -------------------------------------------------------------------------------- /src/lib.rs: -------------------------------------------------------------------------------- 1 | #![allow(clippy::many_single_char_names)] 2 | pub mod distributions; 3 | pub mod functions; 4 | pub mod integrate; 5 | pub mod linalg; 6 | pub mod optimize; 7 | pub mod predict; 8 | pub mod prelude; 9 | pub mod statistics; 10 | pub mod timeseries; 11 | pub mod validation; 12 | -------------------------------------------------------------------------------- /src/linalg/array/dot.rs: -------------------------------------------------------------------------------- 1 | //! Implementation of matrix products for vectors and matrices. 2 | 3 | use super::{ 4 | super::{dot, matmul}, 5 | Matrix, Vector, 6 | }; 7 | 8 | /// A trait for performing matrix products. Follows the behaviour of numpy's `matmul`. 9 | /// 10 | /// If multiplying two matrices, performs conventional matrix multiplication. 11 | /// If multiplying a vector with a matrix, promotes the vector to a matrix by prepending a 1 to its dimensions, then performing conventional matrix multiplication, and then flattening the result back down into a vector. 12 | /// If multiplying a matrix with a vector, promotes the vector to a matrix by appending a 1 to its dimensions, then performing conventional matrix multiplication, and then flattening the result back down into a vector. 13 | pub trait Dot { 14 | /// Performs dot product of self and other. 15 | fn dot(&self, other: T) -> S; 16 | /// Performs dot product of self and other, transposing other. 17 | fn dot_t(&self, other: T) -> S; 18 | /// Performs dot product of self and other, transposing self. 19 | fn t_dot(&self, other: T) -> S; 20 | /// Performs dot product of self and other, transposing both self and other. 21 | fn t_dot_t(&self, other: T) -> S; 22 | } 23 | 24 | macro_rules! impl_macro_for_types { 25 | ($macro: ident, $t1: ty, $t2: ty) => { 26 | $macro!($t1, $t2); 27 | $macro!($t1, &$t2); 28 | $macro!(&$t1, $t2); 29 | $macro!(&$t1, &$t2); 30 | }; 31 | } 32 | 33 | macro_rules! impl_mat_mat_dot { 34 | ($selftype: ty, $othertype: ty) => { 35 | impl Dot<$othertype, Matrix> for $selftype { 36 | fn dot(&self, other: $othertype) -> Matrix { 37 | assert_eq!(self.ncols, other.nrows, "matrix shapes not compatible"); 38 | let output = matmul( 39 | &self.data(), 40 | &other.data(), 41 | self.nrows, 42 | other.nrows, 43 | false, 44 | false, 45 | ); 46 | Matrix::new(output, self.nrows as i32, other.ncols as i32) 47 | } 48 | 49 | fn t_dot(&self, other: $othertype) -> Matrix { 50 | assert_eq!(self.nrows, other.nrows, "matrix shapes not compatible"); 51 | let output = matmul( 52 | &self.data(), 53 | &other.data(), 54 | self.nrows, 55 | other.nrows, 56 | true, 57 | false, 58 | ); 59 | Matrix::new(output, self.ncols as i32, other.ncols as i32) 60 | } 61 | fn dot_t(&self, other: $othertype) -> Matrix { 62 | assert_eq!(self.ncols, other.ncols, "matrix shapes not compatible"); 63 | let output = matmul( 64 | &self.data(), 65 | &other.data(), 66 | self.nrows, 67 | other.nrows, 68 | false, 69 | true, 70 | ); 71 | Matrix::new(output, self.nrows as i32, other.nrows as i32) 72 | } 73 | fn t_dot_t(&self, other: $othertype) -> Matrix { 74 | assert_eq!(self.nrows, other.ncols, "matrix shapes not compatible"); 75 | let output = matmul( 76 | &self.data(), 77 | &other.data(), 78 | self.nrows, 79 | other.nrows, 80 | true, 81 | true, 82 | ); 83 | Matrix::new(output, self.ncols as i32, other.nrows as i32) 84 | } 85 | } 86 | }; 87 | } 88 | 89 | impl_macro_for_types!(impl_mat_mat_dot, Matrix, Matrix); 90 | 91 | macro_rules! impl_dot_append_one { 92 | ($othertype: ty, $innerop: ident, $($op: ident),+) => { 93 | $( 94 | fn $op(&self, other: $othertype) -> Vector { 95 | let mut o = other.clone().to_owned().to_matrix(); 96 | o.t_mut(); 97 | self.$innerop(o).to_vec() 98 | } 99 | )+ 100 | } 101 | } 102 | 103 | macro_rules! impl_mat_vec_dot { 104 | ($selftype: ty, $othertype: ty) => { 105 | impl Dot<$othertype, Vector> for $selftype { 106 | // transpose on the vector does nothing 107 | impl_dot_append_one!($othertype, dot, dot, dot_t); 108 | impl_dot_append_one!($othertype, t_dot, t_dot, t_dot_t); 109 | } 110 | }; 111 | } 112 | 113 | impl_macro_for_types!(impl_mat_vec_dot, Matrix, Vector); 114 | 115 | macro_rules! impl_dot_prepend_one { 116 | ($othertype: ty, $innerop: ident, $($op: ident),+) => { 117 | $( 118 | fn $op(&self, other: $othertype) -> Vector { 119 | self.clone().to_owned().to_matrix().$innerop(other).to_vec() 120 | } 121 | )+ 122 | } 123 | } 124 | 125 | macro_rules! impl_vec_mat_dot { 126 | ($selftype: ty, $othertype: ty) => { 127 | impl Dot<$othertype, Vector> for $selftype { 128 | // transpose on the vector does nothing 129 | impl_dot_prepend_one!($othertype, dot, dot, t_dot); 130 | impl_dot_prepend_one!($othertype, dot_t, dot_t, t_dot_t); 131 | } 132 | }; 133 | } 134 | 135 | impl_macro_for_types!(impl_vec_mat_dot, Vector, Matrix); 136 | 137 | macro_rules! impl_dot_vec_vec { 138 | ($othertype: ty, $($op: ident),+) => { 139 | $( 140 | fn $op(&self, other: $othertype) -> f64 { 141 | dot(&self.data(), &other.data()) 142 | } 143 | )+ 144 | } 145 | } 146 | 147 | macro_rules! impl_vec_vec_dot { 148 | ($selftype: ty, $othertype: ty) => { 149 | impl Dot<$othertype, f64> for $selftype { 150 | impl_dot_vec_vec!($othertype, dot, t_dot, dot_t, t_dot_t); 151 | } 152 | }; 153 | } 154 | 155 | impl_macro_for_types!(impl_vec_vec_dot, Vector, Vector); 156 | -------------------------------------------------------------------------------- /src/linalg/array/mod.rs: -------------------------------------------------------------------------------- 1 | //! Vector and matrix structs and supporting functionality. 2 | 3 | mod broadcast; 4 | mod dot; 5 | mod matrix; 6 | mod vec; 7 | mod vops; 8 | 9 | pub use broadcast::*; 10 | pub use dot::*; 11 | pub use matrix::*; 12 | pub use vec::*; 13 | pub(crate) use vops::*; 14 | -------------------------------------------------------------------------------- /src/linalg/decomposition/cholesky.rs: -------------------------------------------------------------------------------- 1 | //! Implements [Cholesky decomposition](https://en.wikipedia.org/wiki/Cholesky_decomposition). 2 | 3 | use crate::linalg::{ 4 | backward_substitution, dot, forward_substitution, is_square, is_symmetric, transpose, 5 | }; 6 | 7 | /// Computes the Cholesky decomposition of the matrix `a` using the Cholesky-Banachiewicz 8 | /// algorithm. 9 | pub fn cholesky(a: &[f64]) -> Vec { 10 | assert!(is_symmetric(a)); 11 | let n = is_square(a).unwrap(); 12 | 13 | let mut l = vec![0.; n * n]; 14 | 15 | for i in 0..n { 16 | for j in 0..(i + 1) { 17 | let s = dot(&l[(j * n)..(j * n + j)], &l[(i * n)..(i * n + j)]); 18 | 19 | if i == j { 20 | l[i * n + j] = (a[i * n + i] - s).sqrt(); 21 | } else { 22 | l[i * n + j] = (a[i * n + j] - s) / l[j * n + j]; 23 | } 24 | } 25 | } 26 | 27 | l 28 | } 29 | 30 | /// Solves the system Lx=b, where L is a lower triangular matrix (e.g., a Cholesky decomposed 31 | /// matrix), and b is a one dimensional vector. 32 | pub fn cholesky_solve(l: &[f64], b: &[f64]) -> Vec { 33 | let n = is_square(l).unwrap(); 34 | assert_eq!(b.len(), n, "sizes of L and b do not match up"); 35 | 36 | let y = forward_substitution(l, b); 37 | 38 | // back substitution 39 | let lt = transpose(l, n); 40 | backward_substitution(<, &y) 41 | } 42 | 43 | #[cfg(test)] 44 | mod tests { 45 | use super::*; 46 | use approx_eq::assert_approx_eq; 47 | 48 | #[test] 49 | fn test_cholesky() { 50 | let a1 = vec![ 51 | 6., 3., 4., 8., 3., 6., 5., 1., 4., 5., 10., 7., 8., 1., 7., 25., 52 | ]; 53 | let l1 = cholesky(&a1); 54 | let b1 = vec![ 55 | 2.449489742783178, 56 | 0.0, 57 | 0.0, 58 | 0.0, 59 | 1.2247448713915892, 60 | 2.1213203435596424, 61 | 0.0, 62 | 0.0, 63 | 1.6329931618554523, 64 | 1.414213562373095, 65 | 2.309401076758503, 66 | 0.0, 67 | 3.2659863237109046, 68 | -1.4142135623730956, 69 | 1.5877132402714704, 70 | 3.1324910215354165, 71 | ]; 72 | 73 | let a2 = vec![4., 12., -16., 12., 37., -43., -16., -43., 98.]; 74 | let l2 = cholesky(&a2); 75 | let b2 = vec![2., 0., 0., 6., 1., 0., -8., 5., 3.]; 76 | 77 | let a3 = vec![25., 15., -5., 15., 18., 0., -5., 0., 11.]; 78 | let l3 = cholesky(&a3); 79 | let b3 = vec![5., 0., 0., 3., 3., 0., -1., 1., 3.]; 80 | 81 | let l = [l1, l2, l3]; 82 | let b = [b1, b2, b3]; 83 | 84 | for i in 0..3 { 85 | for j in 0..l.len() { 86 | assert_approx_eq!(l[i][j], b[i][j], 1e-2); 87 | } 88 | } 89 | } 90 | 91 | #[test] 92 | fn test_cholesky_solve() { 93 | let a = vec![ 94 | 9., 3., 1., 5., 3., 7., 5., 1., 1., 5., 9., 2., 5., 1., 2., 6., 95 | ]; 96 | let l = cholesky(&a); 97 | let x = cholesky_solve(&l, &[1., 1., 1., 1.]); 98 | let sol = [-0.01749271, 0.11953353, 0.01166181, 0.1574344]; 99 | for i in 0..4 { 100 | assert_approx_eq!(x[i], sol[i], 1e-2); 101 | } 102 | } 103 | } 104 | -------------------------------------------------------------------------------- /src/linalg/decomposition/lu.rs: -------------------------------------------------------------------------------- 1 | //! Implements [LU decomposition](https://en.wikipedia.org/wiki/LU_decomposition) and system solving with the decomposition. 2 | 3 | use std::cmp; 4 | 5 | // #[cfg(feature = "lapack")] 6 | // use lapack::dgetrf; 7 | 8 | use crate::linalg::is_square; 9 | 10 | /// Computes the pivoted LU decomposition of a square matrix. For some matrix A, this decomposition 11 | /// is A = PLU. The resulting matrix has U in its upper triangle and L in its lower triangle. 12 | /// The unit diagonal elements of L are not stored. The pivot indices representing the permutation 13 | /// matrix P is also returned. 14 | pub fn lu(matrix: &[f64]) -> (Vec, Vec) { 15 | let n = is_square(matrix).unwrap(); 16 | let mut lu = matrix.to_vec(); 17 | 18 | let mut pivots: Vec = (0..n).map(|x| x as i32).collect(); 19 | 20 | for j in 0..n { 21 | for i in 0..n { 22 | let mut s = 0.; 23 | for k in 0..cmp::min(i, j) { 24 | s += lu[i * n + k] * lu[k * n + j]; 25 | } 26 | lu[i * n + j] -= s; 27 | } 28 | 29 | let mut p = j; 30 | for i in (j + 1)..n { 31 | if lu[i * n + j].abs() > lu[p * n + j].abs() { 32 | p = i; 33 | } 34 | } 35 | 36 | if p != j { 37 | for k in 0..n { 38 | lu.swap(p * n + k, j * n + k) 39 | } 40 | pivots.swap(p, j); 41 | } 42 | 43 | if j < n && lu[j * n + j] != 0. { 44 | for i in (j + 1)..n { 45 | lu[i * n + j] /= lu[j * n + j]; 46 | } 47 | } 48 | } 49 | 50 | (lu, pivots) 51 | } 52 | 53 | /// Solve the linear system Ax = b given a LU decomposed matrix A. The first argument should be a 54 | /// tuple, where the first element is the LU decomposed matrix and the second element is the pivots 55 | /// P. 56 | pub fn lu_solve(lu: &[f64], pivots: &[i32], b: &[f64]) -> Vec { 57 | let n = b.len(); 58 | assert!(lu.len() == n * n); 59 | 60 | let mut x = vec![0.; n]; 61 | for i in 0..pivots.len() { 62 | x[i] = b[pivots[i] as usize]; 63 | } 64 | 65 | for k in 0..n { 66 | for i in (k + 1)..n { 67 | x[i] -= x[k] * lu[i * n + k]; 68 | } 69 | } 70 | 71 | for k in (0..n).rev() { 72 | x[k] /= lu[k * n + k]; 73 | for i in 0..k { 74 | x[i] -= x[k] * lu[i * n + k]; 75 | } 76 | } 77 | 78 | x 79 | } 80 | 81 | #[cfg(test)] 82 | mod tests { 83 | use super::super::lu_solve; 84 | use super::*; 85 | use approx_eq::assert_approx_eq; 86 | 87 | #[test] 88 | fn test_lu() { 89 | let A = vec![ 90 | -0.46519316, 91 | -3.1042875, 92 | -5.01766541, 93 | -1.86300107, 94 | 2.7692825, 95 | 2.3097699, 96 | -12.3854289, 97 | -8.70520295, 98 | 6.02201052, 99 | -6.71212792, 100 | -1.74683781, 101 | -6.08893455, 102 | -2.53731118, 103 | 2.72112893, 104 | 4.70204472, 105 | -1.03387848, 106 | ]; 107 | let b = vec![-4.13075599, -1.28124453, 4.65406058, 3.69106842]; 108 | 109 | let (lu, piv) = lu(&A); 110 | let x = lu_solve(&lu, &piv, &b); 111 | 112 | let lu_ref = vec![ 113 | 6.02201052, 114 | -6.71212792, 115 | -1.74683781, 116 | -6.08893455, 117 | 0.45986012, 118 | 5.39640987, 119 | -11.58212785, 120 | -5.90514476, 121 | -0.07724881, 122 | -0.67133363, 123 | -12.92807843, 124 | -6.29768626, 125 | -0.42133955, 126 | -0.01981984, 127 | -0.28902028, 128 | -5.53658552, 129 | ]; 130 | let x_ref = vec![0.68581948, 0.33965616, 0.8063919, -0.69182874]; 131 | 132 | for i in 0..16 { 133 | assert_approx_eq!(lu[i], lu_ref[i]); 134 | } 135 | 136 | for i in 0..4 { 137 | assert_approx_eq!(x[i], x_ref[i]); 138 | } 139 | } 140 | } 141 | -------------------------------------------------------------------------------- /src/linalg/decomposition/mod.rs: -------------------------------------------------------------------------------- 1 | pub mod cholesky; 2 | pub mod lu; 3 | pub mod substitution; 4 | 5 | pub use cholesky::*; 6 | pub use lu::*; 7 | pub use substitution::*; 8 | -------------------------------------------------------------------------------- /src/linalg/decomposition/substitution.rs: -------------------------------------------------------------------------------- 1 | //! Forward and backward substitution. 2 | 3 | use crate::linalg::{dot, is_square}; 4 | 5 | /// Solve a matrix equation of the form Lx=b, where L is a lower triangular matrix. 6 | /// See the [Wikipedia page](https://en.wikipedia.org/wiki/Triangular_matrix#Forward_and_back_substitution). 7 | pub fn forward_substitution(l: &[f64], b: &[f64]) -> Vec { 8 | let n = is_square(l).unwrap(); 9 | assert_eq!(b.len(), n); 10 | // let mut x = vec![0.; n]; 11 | let mut x = Vec::with_capacity(n); 12 | unsafe { 13 | x.set_len(n); 14 | } 15 | for i in 0..n { 16 | x[i] = (b[i] - dot(&l[(i * n)..(i * n + i)], &x[..i])) / l[i * n + i]; 17 | } 18 | x 19 | } 20 | 21 | /// Solve a matrix equation of the form Ux=b, where U is an upper triangular matrix. 22 | /// See the [Wikipedia page](https://en.wikipedia.org/wiki/Triangular_matrix#Forward_and_back_substitution). 23 | pub fn backward_substitution(u: &[f64], b: &[f64]) -> Vec { 24 | let n = is_square(u).unwrap(); 25 | assert_eq!(b.len(), n); 26 | let mut x = Vec::with_capacity(n); 27 | unsafe { 28 | x.set_len(n); 29 | } 30 | for i in (0..n).rev() { 31 | x[i] = (b[i] - dot(&u[(i * n + i + 1)..(i * n + n)], &x[i + 1..])) / u[i * n + i]; 32 | } 33 | x 34 | } 35 | -------------------------------------------------------------------------------- /src/linalg/mod.rs: -------------------------------------------------------------------------------- 1 | //! Provides general linear algebra methods and matrix decompositions with a focus on low-dimensional data. 2 | 3 | mod array; 4 | mod decomposition; 5 | mod rotations; 6 | mod utils; 7 | 8 | pub use array::*; 9 | pub use decomposition::*; 10 | pub use rotations::*; 11 | pub use utils::*; 12 | -------------------------------------------------------------------------------- /src/linalg/rotations.rs: -------------------------------------------------------------------------------- 1 | /// Utilities for 3D rotations. 2 | use super::Matrix; 3 | 4 | pub enum Axis { 5 | X, 6 | Y, 7 | Z, 8 | } 9 | 10 | /// Returns a 3D clockwise rotation matrix along the `axis` axis (either X, Y, or Z) with a given angle in 11 | /// radians. 12 | /// 13 | /// # Remarks 14 | /// These are clockwise rotation matrices. Use `rotation_matrix_ccw` to get counter-clockwise rotation 15 | /// matrices. 16 | pub fn rotation_matrix_cw(angle: f64, axis: Axis) -> Matrix { 17 | let data = match axis { 18 | Axis::X => [ 19 | 1., 20 | 0., 21 | 0., 22 | 0., 23 | angle.cos(), 24 | angle.sin(), 25 | 0., 26 | -angle.sin(), 27 | angle.cos(), 28 | ], 29 | Axis::Y => [ 30 | angle.cos(), 31 | 0., 32 | -angle.sin(), 33 | 0., 34 | 1., 35 | 0., 36 | angle.sin(), 37 | 0., 38 | angle.cos(), 39 | ], 40 | Axis::Z => [ 41 | angle.cos(), 42 | angle.sin(), 43 | 0., 44 | -angle.sin(), 45 | angle.cos(), 46 | 0., 47 | 0., 48 | 0., 49 | 1., 50 | ], 51 | }; 52 | Matrix::new(data, 3, 3) 53 | } 54 | 55 | /// Returns a 3D counter-clockwise rotation matrix along the `axis` axis (either X, Y, or Z) with a given angle in 56 | /// radians. 57 | /// 58 | /// # Remarks 59 | /// These are counter-clockwise rotation matrices. Use `rotation_matrix_cw` to get clockwise rotation 60 | /// matrices. 61 | pub fn rotation_matrix_ccw(angle: f64, axis: Axis) -> Matrix { 62 | let data = match axis { 63 | Axis::X => [ 64 | 1., 65 | 0., 66 | 0., 67 | 0., 68 | angle.cos(), 69 | -angle.sin(), 70 | 0., 71 | angle.sin(), 72 | angle.cos(), 73 | ], 74 | Axis::Y => [ 75 | angle.cos(), 76 | 0., 77 | angle.sin(), 78 | 0., 79 | 1., 80 | 0., 81 | -angle.sin(), 82 | 0., 83 | angle.cos(), 84 | ], 85 | Axis::Z => [ 86 | angle.cos(), 87 | -angle.sin(), 88 | 0., 89 | angle.sin(), 90 | angle.cos(), 91 | 0., 92 | 0., 93 | 0., 94 | 1., 95 | ], 96 | }; 97 | Matrix::new(data, 3, 3) 98 | } 99 | -------------------------------------------------------------------------------- /src/optimize/adam.rs: -------------------------------------------------------------------------------- 1 | use super::Optimizer; 2 | use crate::prelude::Vector; 3 | use approx_eq::rel_diff; 4 | use reverse::*; 5 | 6 | /// Implements the Adam optimizer. See [Kingma and Ba 2014](https://arxiv.org/abs/1412.6980) for 7 | /// details about the algorithm. 8 | /// 9 | /// # Examples 10 | /// 11 | /// ```rust 12 | /// // optimize the [Rosenbrock function](https://en.wikipedia.org/wiki/Rosenbrock_function) 13 | /// // with fixed parameters `a = 1` and `b = 100`. 14 | /// // the minimum value is at (1, 1), which is what we will try to recover 15 | /// 16 | /// use compute::optimize::*; 17 | /// use approx_eq::assert_approx_eq; 18 | /// use reverse::Var; 19 | /// 20 | /// fn rosenbrock<'a>(p: &[Var<'a>], d: &[&[f64]]) -> Var<'a> { 21 | /// assert_eq!(p.len(), 2); 22 | /// assert_eq!(d.len(), 1); 23 | /// assert_eq!(d[0].len(), 2); 24 | /// 25 | /// let (x, y) = (p[0], p[1]); 26 | /// let (a, b) = (d[0][0], d[0][1]); 27 | /// 28 | /// (a - x).powi(2) + b * (y - x.powi(2)).powi(2) 29 | /// } 30 | /// 31 | /// let init = [0., 0.]; 32 | /// let optim = Adam::with_stepsize(5e-4); 33 | /// let popt = optim.optimize(rosenbrock, &init, &[&[1., 100.]], 10000); 34 | /// 35 | /// assert_approx_eq!(popt[0], 1.); 36 | /// assert_approx_eq!(popt[1], 1.); 37 | /// ``` 38 | #[derive(Debug, Clone)] 39 | pub struct Adam { 40 | stepsize: f64, // step size 41 | beta1: f64, // exponential decay rate for first moment 42 | beta2: f64, // exponential decay rate for second moment 43 | epsilon: f64, // small number to prevent division by zero 44 | tape: Tape, // tape for gradients 45 | } 46 | 47 | impl Adam { 48 | /// Create a new Adam optimizer. 49 | /// stepsize: step size 50 | /// beta1: exponential decay rate for first moment 51 | /// beta2: exponential decay rate for second moment 52 | /// epsilon: small number to prevent division by zero 53 | pub fn new(stepsize: f64, beta1: f64, beta2: f64, epsilon: f64) -> Self { 54 | assert!(beta1 > 0., "beta1 must be positive"); 55 | assert!(beta2 > 0., "beta2 must be positive"); 56 | Adam { 57 | stepsize, 58 | beta1, 59 | beta2, 60 | epsilon, 61 | tape: Tape::new(), 62 | } 63 | } 64 | pub fn set_stepsize(&mut self, stepsize: f64) { 65 | self.stepsize = stepsize; 66 | } 67 | pub fn with_stepsize(stepsize: f64) -> Self { 68 | let mut adam = Self::default(); 69 | adam.set_stepsize(stepsize); 70 | adam 71 | } 72 | } 73 | 74 | impl Default for Adam { 75 | /// Uses the defaults recommended by Kingma and Ba 2014 76 | fn default() -> Self { 77 | Self::new(0.001, 0.9, 0.999, 1e-8) 78 | } 79 | } 80 | 81 | impl Optimizer for Adam { 82 | type Output = Vector; 83 | /// Run the optimization algorithm, given a vector of parameters to optimize and a function which calculates the residuals. 84 | fn optimize( 85 | &self, 86 | f: F, 87 | parameters: &[f64], 88 | data: &[&[f64]], 89 | maxsteps: usize, 90 | ) -> Self::Output 91 | where 92 | F: for<'a> Fn(&[Var<'a>], &[&[f64]]) -> Var<'a>, 93 | { 94 | self.tape.clear(); 95 | let mut params = parameters 96 | .iter() 97 | .map(|&x| self.tape.add_var(x)) 98 | .collect::>(); 99 | let param_len = params.len(); 100 | 101 | let mut t: usize = 0; 102 | let mut m = Vector::zeros(param_len); 103 | let mut v = Vector::zeros(param_len); 104 | let mut converged = false; 105 | 106 | while t < maxsteps && !converged { 107 | t += 1; 108 | 109 | let prev_params = params.clone(); 110 | 111 | let res = f(¶ms, data); 112 | 113 | eprintln!("t = {:?}, res = {}", t, res.val()); 114 | 115 | let grad = res.grad().wrt(¶ms); 116 | 117 | for p in 0..param_len { 118 | m[p] = self.beta1 * m[p] + (1. - self.beta1) * grad[p]; // biased first moment estimate 119 | v[p] = self.beta2 * v[p] + (1. - self.beta2) * grad[p] * grad[p]; // biased second moment estimate 120 | let mhat = m[p] / (1. - self.beta1.powi(t as i32)); // bias-corrected first moment estimate 121 | let vhat = v[p] / (1. - self.beta2.powi(t as i32)); // bias-corrected second moment estimate 122 | params[p] = params[p] - self.stepsize * mhat / (vhat.sqrt() + self.epsilon); 123 | } 124 | 125 | if crate::statistics::max( 126 | &(0..param_len) 127 | .map(|i| rel_diff(params[i].val(), prev_params[i].val())) 128 | .collect::>(), 129 | ) < f64::EPSILON 130 | { 131 | converged = true; 132 | } 133 | 134 | // clear gradients and intermediate variables 135 | self.tape.clear(); 136 | params = params 137 | .iter() 138 | .map(|&x| self.tape.add_var(x.val())) 139 | .collect::>(); 140 | } 141 | 142 | Vector::new(params.iter().map(|x| x.val()).collect::>()) 143 | } 144 | } 145 | 146 | #[cfg(test)] 147 | mod tests { 148 | use super::*; 149 | use approx_eq::assert_approx_eq; 150 | 151 | #[test] 152 | fn test_adam_slr() { 153 | let x = vec![ 154 | 0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15., 16., 17., 18., 155 | 19., 20., 21., 22., 23., 24., 25., 26., 27., 28., 29., 30., 31., 32., 33., 34., 35., 156 | 36., 37., 38., 39., 40., 41., 42., 43., 44., 45., 46., 47., 48., 49., 50., 51., 52., 157 | 53., 54., 55., 56., 57., 58., 59., 60., 61., 62., 63., 64., 65., 66., 67., 68., 69., 158 | 70., 71., 72., 73., 74., 75., 76., 77., 78., 79., 80., 81., 82., 83., 84., 85., 86., 159 | 87., 88., 89., 90., 91., 92., 93., 94., 95., 96., 97., 98., 99., 160 | ]; 161 | // actual coefficients 162 | let coeffs = vec![5., -2.5]; 163 | let y = [ 164 | 5., 2.5, 0., -2.5, -5., -7.5, -10., -12.5, -15., -17.5, -20., -22.5, -25., -27.5, -30., 165 | -32.5, -35., -37.5, -40., -42.5, -45., -47.5, -50., -52.5, -55., -57.5, -60., -62.5, 166 | -65., -67.5, -70., -72.5, -75., -77.5, -80., -82.5, -85., -87.5, -90., -92.5, -95., 167 | -97.5, -100., -102.5, -105., -107.5, -110., -112.5, -115., -117.5, -120., -122.5, 168 | -125., -127.5, -130., -132.5, -135., -137.5, -140., -142.5, -145., -147.5, -150., 169 | -152.5, -155., -157.5, -160., -162.5, -165., -167.5, -170., -172.5, -175., -177.5, 170 | -180., -182.5, -185., -187.5, -190., -192.5, -195., -197.5, -200., -202.5, -205., 171 | -207.5, -210., -212.5, -215., -217.5, -220., -222.5, -225., -227.5, -230., -232.5, 172 | -235., -237.5, -240., -242.5, 173 | ]; 174 | 175 | fn fn_resid<'a>(params: &[Var<'a>], data: &[&[f64]]) -> Var<'a> { 176 | let (x, y) = (data[0], data[1]); 177 | x.iter() 178 | .zip(y) 179 | .map(|(&xv, &yv)| ((params[0] + xv * params[1]) - yv).powi(2)) 180 | .sum() 181 | } 182 | 183 | let mut optim = Adam::default(); 184 | optim.set_stepsize(0.1); 185 | let est_params = optim.optimize(fn_resid, &[1., 1.], &[&x, &y], 5000); 186 | 187 | for i in 0..2 { 188 | assert_approx_eq!(est_params[i], coeffs[i], 0.01); 189 | } 190 | } 191 | 192 | #[test] 193 | // #[ignore] 194 | fn test_adam_rosenbrock() { 195 | // optimize the [Rosenbrock function](https://en.wikipedia.org/wiki/Rosenbrock_function) 196 | // with fixed parameters `a = 1` and `b = 100`. 197 | // the minimum value is at (1, 1), which is what we will try to recover 198 | 199 | fn rosenbrock<'a>(p: &[Var<'a>], d: &[&[f64]]) -> Var<'a> { 200 | assert_eq!(p.len(), 2); 201 | assert_eq!(d.len(), 1); 202 | assert_eq!(d[0].len(), 2); 203 | 204 | let (x, y) = (p[0], p[1]); 205 | let (a, b) = (d[0][0], d[0][1]); 206 | 207 | (a - x).powi(2) + b * (y - x.powi(2)).powi(2) 208 | } 209 | 210 | let init = [0., 0.]; 211 | let optim = Adam::with_stepsize(5e-4); 212 | let popt = optim.optimize(rosenbrock, &init, &[&[1., 100.]], 10000); 213 | 214 | assert_approx_eq!(popt[0], 1.); 215 | assert_approx_eq!(popt[1], 1.); 216 | } 217 | } 218 | -------------------------------------------------------------------------------- /src/optimize/lbfgs.rs: -------------------------------------------------------------------------------- 1 | #[derive(Debug, Clone, Copy)] 2 | pub struct LBFGS {} 3 | 4 | impl LBFGS { 5 | fn linesearch() {} 6 | 7 | fn twoloop() {} 8 | 9 | fn optimize() {} 10 | } 11 | 12 | #[cfg(test)] 13 | mod tests { 14 | use crate::prelude::{Float, F1}; 15 | use std::f64::consts::PI; 16 | 17 | #[test] 18 | fn test_lbfgs() { 19 | fn loglikelihood(params: &[F1], data: &[&[f64]]) -> F1 { 20 | assert_eq!(params.len(), 3); 21 | assert_eq!(data.len(), 2); 22 | assert_eq!(data[0].len(), data[1].len()); 23 | let (b, m, sigma) = (params[0], params[1], params[2]); 24 | let n = data[0].len() as f64; 25 | let mu: Vec = data[0].iter().map(|&v| m * v + b).collect(); 26 | let ymusqsig: F1 = data[1] 27 | .iter() 28 | .zip(mu) 29 | .map(|(yv, muv)| (*yv - muv).powi(2) / (2. * sigma.powi(2))) 30 | .sum(); 31 | return -n / 2. * (2. * PI * sigma.powi(2)) - ymusqsig; 32 | } 33 | 34 | dbg!(loglikelihood( 35 | &[2., 3., 1.].iter().map(|x| F1::var(*x)).collect::>(), 36 | &[&[1., 2.], &[2., 4.]] 37 | )); 38 | } 39 | } 40 | -------------------------------------------------------------------------------- /src/optimize/lm.rs: -------------------------------------------------------------------------------- 1 | use super::Optimizer; 2 | use crate::linalg::{Dot, Matrix, Solve, Vector}; 3 | use reverse::*; 4 | 5 | /// Implements a [Levenberg-Marquardt optimizer](https://en.wikipedia.org/wiki/Levenberg%E2%80%93Marquardt_algorithm) 6 | /// for solving (non-linear) least squares problems. 7 | /// 8 | /// # Example 9 | /// 10 | /// ```rust 11 | /// use compute::prelude::{LM, Optimizer, Vector}; 12 | /// use reverse::*; 13 | /// 14 | /// // pairs of points (x_i, y_i) 15 | /// let x = Vector::from([1., 2., 3., 4., 5., 6., 7., 8., 9.]); 16 | /// let y = Vector::from([11., 22., 33., 44., 55., 66., 77., 88., 99.]); 17 | /// 18 | /// // define a function to optimize: 19 | /// // f(parameters, data) where parameters are parameters to optimize 20 | /// // and data are data to be used in the function 21 | /// 22 | /// fn equation_line<'a>(params: &[Var<'a>], data: &[&[f64]]) -> Var<'a> { 23 | /// assert!(data.len() == 1); 24 | /// assert!(data[0].len() == 1); 25 | /// assert!(params.len() == 2); 26 | /// 27 | /// return data[0][0] * params[0] + params[1]; 28 | /// } 29 | /// 30 | /// // create an instance of the optimizer 31 | /// let lm = LM::default(); 32 | /// 33 | /// // initial parameters (guess) 34 | /// let params = [1., 2.]; 35 | /// 36 | /// // run for max of 50 steps and find the best parameters 37 | /// // and the associated estimated covariance matrix. 38 | /// // the standard deviations of the parameters can be obtained from the 39 | /// // square root of the diagonal elements of the covariance matrix. 40 | /// let (popt, pcov) = lm.optimize(equation_line, ¶ms, &[&x, &y], 50); 41 | /// let perr = pcov.diag().sqrt(); 42 | /// 43 | /// println!("{}", popt); 44 | /// println!("{}", pcov); 45 | /// 46 | /// assert!((popt[0] - 11.).abs() < 0.01); 47 | /// assert!((popt[1] - 0.).abs() < 0.01); 48 | /// ``` 49 | 50 | #[derive(Debug, Clone)] 51 | pub struct LM { 52 | pub eps1: f64, // tolerance for norm of residuals 53 | pub eps2: f64, // tolerance for change in parameters 54 | pub tau: f64, // initial scaling for damping factor 55 | tape: Tape, // tape for computing gradients 56 | } 57 | 58 | impl Default for LM { 59 | fn default() -> Self { 60 | LM { 61 | eps1: 1e-6, 62 | eps2: 1e-6, 63 | tau: 1e-2, 64 | tape: Tape::new(), 65 | } 66 | } 67 | } 68 | 69 | impl LM { 70 | /// Create a new Levenberg-Marquardt optimizer. 71 | pub fn new(eps1: f64, eps2: f64, tau: f64) -> Self { 72 | LM { 73 | eps1, 74 | eps2, 75 | tau, 76 | tape: Tape::new(), 77 | } 78 | } 79 | } 80 | 81 | impl Optimizer for LM { 82 | type Output = (Vector, Matrix); 83 | fn optimize( 84 | &self, 85 | f: F, 86 | parameters: &[f64], 87 | data: &[&[f64]], 88 | maxsteps: usize, 89 | ) -> (Vector, Matrix) 90 | where 91 | F: for<'a> Fn(&[Var<'a>], &[&[f64]]) -> Var<'a>, 92 | { 93 | self.tape.clear(); 94 | let mut params = parameters 95 | .into_iter() 96 | .copied() 97 | .map(|x| self.tape.add_var(x)) 98 | .collect::>(); 99 | 100 | let param_len = params.len(); 101 | assert!(data.len() == 2, "data must contain two slices (x and y)"); 102 | let (xs, ys) = (data[0], data[1]); 103 | assert_eq!(xs.len(), ys.len(), "x and y must have the same length"); 104 | let n = xs.len(); 105 | 106 | let (mut res, grad): (Vector, Vec) = xs 107 | .iter() 108 | .zip(ys) 109 | .map(|(&x, &y)| { 110 | let val = f(¶ms, &[&[x]]); 111 | ((y - val).val(), Vector::from(val.grad().wrt(¶ms))) 112 | }) 113 | .unzip(); 114 | 115 | let mut jacobian = Matrix::new( 116 | grad.into_iter().flatten().collect::(), 117 | n as i32, 118 | param_len as i32, 119 | ); 120 | 121 | let mut jtj = jacobian.t_dot(&jacobian); 122 | let mut jtr = jacobian.t_dot(&res).to_matrix(); 123 | 124 | let mut step = 0; 125 | let mut mu = self.tau * jtj.diag().max(); 126 | let mut nu = 2.; 127 | 128 | let mut stop = jtr.inf_norm() <= self.eps1; 129 | 130 | loop { 131 | step += 1; 132 | if step > maxsteps || stop { 133 | break; 134 | } 135 | 136 | // apply adaptive damping parameter 137 | let mut damped = jtj.clone(); 138 | for i in 0..param_len { 139 | damped[[i, i]] += mu * jtj[[i, i]]; 140 | } 141 | 142 | let delta = damped.solve(jtr.data()); 143 | 144 | stop = delta.norm() 145 | <= self.eps2 146 | * (params.iter().map(|x| x.val()).collect::().norm() + self.eps2); 147 | if stop { 148 | break; 149 | } 150 | 151 | // calculations using new proposed parameters 152 | // let new_params: Vec = (0..param_len).map(|i| params[i] + delta[i]).collect(); 153 | let new_params = params 154 | .iter() 155 | .zip(&delta) 156 | .map(|(&x, &d)| x + d) 157 | .collect::>(); 158 | 159 | let new_res: Vector = xs 160 | .iter() 161 | .zip(ys) 162 | .map(|(&x, y)| { 163 | let val = f(&new_params, &[&[x]]).val(); 164 | y - val 165 | }) 166 | .collect(); 167 | 168 | let res_norm_sq = res.dot(&res); 169 | let new_res_norm_sq = new_res.dot(&new_res); 170 | 171 | let pred_reduction = delta.t_dot(mu * &delta + jtr.data()); 172 | 173 | // calculate the gain ratio (actual reduction in error over predicted reduction) 174 | let rho = (res_norm_sq - new_res_norm_sq) / (0.5 * pred_reduction); 175 | 176 | if rho > 0. { 177 | // good step, accept the new parameters and update all variables 178 | params.copy_from_slice(&new_params); 179 | 180 | let new_grad = xs 181 | .iter() 182 | .map(|&x| { 183 | let res = f(&new_params, &[&[x]]); 184 | Vector::from(res.grad().wrt(&new_params)) 185 | }) 186 | .flatten() 187 | .collect::(); 188 | 189 | jacobian = Matrix::new(new_grad, n as i32, param_len as i32); 190 | 191 | jtj = jacobian.t_dot(&jacobian); 192 | jtr = jacobian.t_dot(&new_res).to_matrix(); 193 | res = new_res; 194 | stop = jtr.inf_norm() <= self.eps1; 195 | if stop { 196 | break; 197 | } 198 | // adjust damping factor 199 | mu = f64::max(1. / 3., 1. - (2. * rho - 1.).powi(3)); 200 | nu = 2.; 201 | } else { 202 | // increase damping factor and try again with same parameters 203 | mu *= nu; 204 | nu *= 2.; 205 | } 206 | 207 | // clear gradients and intermediate variables 208 | self.tape.clear(); 209 | params = params 210 | .iter() 211 | .map(|x| self.tape.add_var(x.val())) 212 | .collect::>(); 213 | } 214 | 215 | ( 216 | Vector::new(params.iter().map(|x| x.val()).collect::>()), 217 | res.t_dot(&res) / (n - param_len) as f64 * jtj.inv(), 218 | ) 219 | } 220 | } 221 | -------------------------------------------------------------------------------- /src/optimize/mod.rs: -------------------------------------------------------------------------------- 1 | //! Various optimization algorithms (eg. Adam, SGD, Levenberg-Marquardt). 2 | 3 | mod adam; 4 | // mod lbfgs; 5 | mod lm; 6 | mod sgd; 7 | 8 | pub trait Optimizer { 9 | type Output; 10 | fn optimize( 11 | &self, 12 | f: F, 13 | parameters: &[f64], 14 | data: &[&[f64]], 15 | maxsteps: usize, 16 | ) -> Self::Output 17 | where 18 | F: for<'a> Fn(&[Var<'a>], &[&[f64]]) -> Var<'a>; 19 | } 20 | 21 | pub use self::adam::*; 22 | // pub use self::lbfgs::*; 23 | pub use self::lm::*; 24 | pub use self::sgd::*; 25 | // re-export reverse 26 | pub use reverse::*; 27 | -------------------------------------------------------------------------------- /src/optimize/optimizers/mod.rs: -------------------------------------------------------------------------------- 1 | //! Various optimization algorithms (eg. Adam, SGD, Levenberg-Marquardt). 2 | 3 | use super::DiffFn; 4 | 5 | mod adam; 6 | // mod lbfgs; 7 | mod lm; 8 | mod sgd; 9 | 10 | pub trait Optimizer { 11 | type Output; 12 | fn optimize( 13 | &self, 14 | f: F, 15 | parameters: &[f64], 16 | data: &[&[f64]], 17 | maxsteps: usize, 18 | ) -> Self::Output 19 | where 20 | F: DiffFn; 21 | } 22 | 23 | pub use self::adam::*; 24 | // pub use self::lbfgs::*; 25 | pub use self::lm::*; 26 | pub use self::sgd::*; 27 | -------------------------------------------------------------------------------- /src/optimize/sgd.rs: -------------------------------------------------------------------------------- 1 | use super::Optimizer; 2 | use crate::linalg::Vector; 3 | use approx_eq::rel_diff; 4 | use reverse::*; 5 | 6 | /// Implements the Stochastic Gradient Descent optimizer with (Nesterov) momentum. 7 | /// 8 | /// # Examples 9 | /// 10 | /// ```rust 11 | /// // optimize the [Rosenbrock function](https://en.wikipedia.org/wiki/Rosenbrock_function) 12 | /// // with fixed parameters `a = 1` and `b = 100`. 13 | /// // the minimum value is at (1, 1), which is what we will try to recover 14 | /// 15 | /// use compute::optimize::*; 16 | /// use approx_eq::assert_approx_eq; 17 | /// use reverse::Var; 18 | /// 19 | /// fn rosenbrock<'a>(p: &[Var<'a>], d: &[&[f64]]) -> Var<'a> { 20 | /// assert_eq!(p.len(), 2); 21 | /// assert_eq!(d.len(), 1); 22 | /// assert_eq!(d[0].len(), 2); 23 | /// 24 | /// let (x, y) = (p[0], p[1]); 25 | /// let (a, b) = (d[0][0], d[0][1]); 26 | /// 27 | /// (a - x).powi(2) + b * (y - x.powi(2)).powi(2) 28 | /// } 29 | /// 30 | /// let init = [0., 0.]; 31 | /// let optim = SGD::new(1e-3, 0.9, true); 32 | /// let popt = optim.optimize(rosenbrock, &init, &[&[1., 100.]], 10000); 33 | /// 34 | /// assert_approx_eq!(popt[0], 1.); 35 | /// assert_approx_eq!(popt[1], 1.); 36 | /// ``` 37 | #[derive(Debug, Clone)] 38 | pub struct SGD { 39 | stepsize: f64, // step size 40 | momentum: f64, // momentum 41 | nesterov: bool, // whether to use Nesterov accelerated gradient 42 | tape: Tape, // tape for computing gradients 43 | } 44 | 45 | impl SGD { 46 | /// Create a new SGD optimizer. 47 | /// 48 | /// stepsize: step size 49 | /// momentum: momentum factor 50 | /// nesterov: whether to use Nesterov momentum 51 | pub fn new(stepsize: f64, momentum: f64, nesterov: bool) -> Self { 52 | Self { 53 | stepsize, 54 | momentum, 55 | nesterov, 56 | tape: Tape::new(), 57 | } 58 | } 59 | pub fn set_stepsize(&mut self, stepsize: f64) { 60 | assert!(stepsize > 0., "stepsize must be positive"); 61 | self.stepsize = stepsize; 62 | } 63 | } 64 | 65 | impl Default for SGD { 66 | fn default() -> Self { 67 | Self { 68 | stepsize: 1e-5, 69 | momentum: 0.9, 70 | nesterov: true, 71 | tape: Tape::new(), 72 | } 73 | } 74 | } 75 | 76 | impl Optimizer for SGD { 77 | type Output = Vector; 78 | /// Run the optimization algorithm, given a vector of parameters to optimize and a function which calculates the residuals. 79 | fn optimize( 80 | &self, 81 | f: F, 82 | parameters: &[f64], 83 | data: &[&[f64]], 84 | maxsteps: usize, 85 | ) -> Self::Output 86 | where 87 | F: for<'a> Fn(&[Var<'a>], &[&[f64]]) -> Var<'a>, 88 | { 89 | self.tape.clear(); 90 | let param_len = parameters.len(); 91 | let mut params = parameters 92 | .iter() 93 | .map(|&x| self.tape.add_var(x)) 94 | .collect::>(); 95 | let mut update_vec = Vector::zeros(param_len); 96 | 97 | let mut t: usize = 0; 98 | let mut converged = false; 99 | 100 | while t < maxsteps && !converged { 101 | t += 1; 102 | let prev_params = params.clone(); 103 | 104 | let grad = if self.nesterov { 105 | let future_params = params 106 | .iter() 107 | .zip(&update_vec) 108 | .map(|(p, u)| *p - self.momentum * u) 109 | .collect::>(); 110 | let res = f(&future_params, data); 111 | eprintln!("t = {:?}, res = {}", t, res.val()); 112 | res.grad().wrt(&future_params) 113 | } else { 114 | f(¶ms, data).grad().wrt(¶ms) 115 | }; 116 | 117 | for p in 0..param_len { 118 | update_vec[p] = self.momentum * update_vec[p] + self.stepsize * grad[p]; 119 | params[p] = params[p] - update_vec[p] 120 | } 121 | // println!("{:?}", params); 122 | 123 | if crate::statistics::max( 124 | &(0..param_len) 125 | .map(|i| rel_diff(params[i].val(), prev_params[i].val())) 126 | .collect::>(), 127 | ) < f64::EPSILON 128 | { 129 | converged = true; 130 | } 131 | 132 | // clear gradients and intermediate variables 133 | self.tape.clear(); 134 | params = params 135 | .iter() 136 | .map(|&x| self.tape.add_var(x.val())) 137 | .collect::>(); 138 | } 139 | Vector::from(params.iter().map(|x| x.val()).collect::>()) 140 | } 141 | } 142 | 143 | #[cfg(test)] 144 | mod tests { 145 | use super::*; 146 | use approx_eq::assert_approx_eq; 147 | 148 | #[test] 149 | fn test_sgd_slr() { 150 | let x = vec![ 151 | 0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15., 16., 17., 18., 152 | 19., 20., 21., 22., 23., 24., 25., 26., 27., 28., 29., 30., 31., 32., 33., 34., 35., 153 | 36., 37., 38., 39., 40., 41., 42., 43., 44., 45., 46., 47., 48., 49., 50., 51., 52., 154 | 53., 54., 55., 56., 57., 58., 59., 60., 61., 62., 63., 64., 65., 66., 67., 68., 69., 155 | 70., 71., 72., 73., 74., 75., 76., 77., 78., 79., 80., 81., 82., 83., 84., 85., 86., 156 | 87., 88., 89., 90., 91., 92., 93., 94., 95., 96., 97., 98., 99., 157 | ]; 158 | // actual coefficients 159 | let coeffs = vec![5., -2.5]; 160 | let y = [ 161 | 5., 2.5, 0., -2.5, -5., -7.5, -10., -12.5, -15., -17.5, -20., -22.5, -25., -27.5, -30., 162 | -32.5, -35., -37.5, -40., -42.5, -45., -47.5, -50., -52.5, -55., -57.5, -60., -62.5, 163 | -65., -67.5, -70., -72.5, -75., -77.5, -80., -82.5, -85., -87.5, -90., -92.5, -95., 164 | -97.5, -100., -102.5, -105., -107.5, -110., -112.5, -115., -117.5, -120., -122.5, 165 | -125., -127.5, -130., -132.5, -135., -137.5, -140., -142.5, -145., -147.5, -150., 166 | -152.5, -155., -157.5, -160., -162.5, -165., -167.5, -170., -172.5, -175., -177.5, 167 | -180., -182.5, -185., -187.5, -190., -192.5, -195., -197.5, -200., -202.5, -205., 168 | -207.5, -210., -212.5, -215., -217.5, -220., -222.5, -225., -227.5, -230., -232.5, 169 | -235., -237.5, -240., -242.5, 170 | ]; 171 | 172 | fn fn_resid<'a>(params: &[Var<'a>], data: &[&[f64]]) -> Var<'a> { 173 | let (x, y) = (data[0], data[1]); 174 | x.iter() 175 | .zip(y) 176 | .map(|(&xv, &yv)| ((params[0] + xv * params[1]) - yv).powi(2)) 177 | .sum() 178 | } 179 | 180 | let optim = SGD::new(2e-6, 0.9, true); 181 | let est_params = optim.optimize(fn_resid, &[1., 1.], &[&x, &y], 10000); 182 | 183 | for i in 0..2 { 184 | assert_approx_eq!(est_params[i], coeffs[i], 0.01); 185 | } 186 | } 187 | } 188 | -------------------------------------------------------------------------------- /src/predict/glms/families.rs: -------------------------------------------------------------------------------- 1 | use crate::linalg::Vector; 2 | use crate::linalg::{norm, vmul, vsub}; 3 | 4 | /// An enum to represent the [exponential 5 | /// family](https://en.wikipedia.org/wiki/Exponential_family) set of distributions. These are 6 | /// intended for use with [GLM](../predict/struct.GLM.html). 7 | #[derive(Debug, Clone, Copy)] 8 | pub enum ExponentialFamily { 9 | Gaussian, 10 | Bernoulli, 11 | QuasiPoisson, 12 | Poisson, 13 | Gamma, 14 | Exponential, 15 | } 16 | 17 | impl ExponentialFamily { 18 | pub fn has_dispersion(&self) -> bool { 19 | match self { 20 | ExponentialFamily::Gaussian => true, 21 | ExponentialFamily::Bernoulli => false, 22 | ExponentialFamily::QuasiPoisson => true, 23 | ExponentialFamily::Poisson => false, 24 | ExponentialFamily::Gamma => true, 25 | ExponentialFamily::Exponential => false, 26 | } 27 | } 28 | 29 | pub fn variance(&self, mu: &[f64]) -> Vector { 30 | match self { 31 | ExponentialFamily::Gaussian => Vector::ones(mu.len()), 32 | ExponentialFamily::Bernoulli => { 33 | let m = Vector::from(mu); 34 | &m * (1. - &m) 35 | } 36 | ExponentialFamily::QuasiPoisson => Vector::from(mu), 37 | ExponentialFamily::Poisson => Vector::from(mu), 38 | ExponentialFamily::Gamma => Vector::from(vmul(&mu, &mu)), 39 | ExponentialFamily::Exponential => Vector::from(vmul(&mu, &mu)), 40 | } 41 | } 42 | 43 | pub fn inv_link(&self, eta: &[f64]) -> Vector { 44 | match self { 45 | ExponentialFamily::Gaussian => Vector::from(eta), 46 | ExponentialFamily::Bernoulli => { 47 | let e = Vector::from(eta); 48 | 1. / (1. + (-e).exp()) 49 | } 50 | ExponentialFamily::QuasiPoisson => Vector::from(eta).exp(), 51 | ExponentialFamily::Poisson => Vector::from(eta).exp(), 52 | ExponentialFamily::Gamma => Vector::from(eta).exp(), 53 | ExponentialFamily::Exponential => Vector::from(eta).exp(), 54 | } 55 | } 56 | 57 | pub fn d_inv_link(&self, eta: &[f64], mu: &[f64]) -> Vector { 58 | match self { 59 | ExponentialFamily::Gaussian => Vector::ones(eta.len()), 60 | ExponentialFamily::Bernoulli => { 61 | let m = Vector::from(mu); 62 | &m * (1. - &m) 63 | } 64 | ExponentialFamily::QuasiPoisson => Vector::from(mu), 65 | ExponentialFamily::Poisson => Vector::from(mu), 66 | ExponentialFamily::Gamma => Vector::from(mu), 67 | ExponentialFamily::Exponential => Vector::from(mu), 68 | } 69 | } 70 | 71 | pub fn deviance(&self, y: &[f64], mu: &[f64]) -> f64 { 72 | let n = y.len(); 73 | assert_eq!(n, mu.len()); 74 | match self { 75 | ExponentialFamily::Gaussian => norm(&vsub(y, mu)), 76 | ExponentialFamily::Bernoulli => { 77 | (0..n) 78 | .map(|i| y[i] * mu[i].ln() + (1. - y[i]) * (1. - mu[i]).ln()) 79 | .sum::() 80 | * -2. 81 | } 82 | ExponentialFamily::QuasiPoisson => { 83 | let ylogy = y 84 | .iter() 85 | .map(|x| if *x == 0. { 0. } else { x * x.ln() }) 86 | .collect::>(); 87 | 2. * (0..y.len()) 88 | .map(|i| mu[i] - y[i] - y[i] * mu[i].ln() + ylogy[i]) 89 | .sum::() 90 | } 91 | ExponentialFamily::Poisson => { 92 | let ylogy = y 93 | .iter() 94 | .map(|x| if *x == 0. { 0. } else { x * x.ln() }) 95 | .collect::>(); 96 | 2. * (0..y.len()) 97 | .map(|i| mu[i] - y[i] - y[i] * mu[i].ln() + ylogy[i]) 98 | .sum::() 99 | } 100 | ExponentialFamily::Gamma => { 101 | 2. * (y 102 | .iter() 103 | .zip(mu) 104 | .map(|(yv, muv)| (yv - muv) / (muv) - (yv / muv).ln()) 105 | .sum::()) 106 | } 107 | ExponentialFamily::Exponential => { 108 | 2. * (y 109 | .iter() 110 | .zip(mu) 111 | .map(|(yv, muv)| (yv - muv) / (muv) - (yv / muv).ln()) 112 | .sum::()) 113 | } 114 | } 115 | } 116 | 117 | pub fn initial_working_response(&self, y: &[f64]) -> Option { 118 | match self { 119 | ExponentialFamily::Gaussian => Some(Vector::from(y)), 120 | ExponentialFamily::Bernoulli => Some((Vector::from(y) - 0.5) / 0.25), 121 | ExponentialFamily::QuasiPoisson => None, 122 | ExponentialFamily::Poisson => None, 123 | ExponentialFamily::Gamma => None, 124 | ExponentialFamily::Exponential => None, 125 | } 126 | } 127 | pub fn initial_working_weights(&self, y: &[f64]) -> Option { 128 | match self { 129 | ExponentialFamily::Gaussian => Some(Vector::ones(y.len()) / y.len() as f64), 130 | // ExponentialFamily::Gaussian => Some(vrecip(&vec![y.len() as f64; y.len()])), 131 | ExponentialFamily::Bernoulli => Some(0.25 * Vector::ones(y.len()) / y.len() as f64), 132 | ExponentialFamily::QuasiPoisson => None, 133 | ExponentialFamily::Poisson => None, 134 | ExponentialFamily::Gamma => None, 135 | ExponentialFamily::Exponential => None, 136 | } 137 | } 138 | 139 | pub fn penalized_deviance(&self, y: &[f64], mu: &[f64], alpha: f64, coef: &[f64]) -> f64 { 140 | self.deviance(y, mu) + alpha * norm(&coef[1..]) 141 | } 142 | } 143 | -------------------------------------------------------------------------------- /src/predict/glms/formula.rs: -------------------------------------------------------------------------------- 1 | use std::collections::HashMap; 2 | 3 | #[derive(Debug, Clone)] 4 | pub struct Formula<'a, 'b> { 5 | formula: &'a str, 6 | data: HashMap<&'b str, Vec>, 7 | } 8 | 9 | impl<'a, 'b> Formula<'a, 'b> { 10 | pub fn new(formula: &'a str, data: HashMap<&'b str, Vec>) -> Self { 11 | Self { formula, data } 12 | } 13 | 14 | pub fn parse(&self) -> Result, &'static str> { 15 | assert!(self.formula.contains("~"), "Formula must contain ~."); 16 | todo!() 17 | } 18 | } 19 | -------------------------------------------------------------------------------- /src/predict/glms/mod.rs: -------------------------------------------------------------------------------- 1 | //! Implementation of the [generalized linear model](https://en.wikipedia.org/wiki/Generalized_linear_model). 2 | //! Includes [logistic regression](https://en.wikipedia.org/wiki/Logistic_regression), 3 | //! [Poisson regression](https://en.wikipedia.org/wiki/Poisson_regression), and other models. 4 | //! 5 | //! # Resources 6 | //! This code is a translation of [py-glm](https://github.com/madrury/py-glm). 7 | 8 | mod families; 9 | // mod formula; 10 | mod glm; 11 | 12 | pub use families::*; 13 | // pub use formula::*; 14 | pub use glm::*; 15 | -------------------------------------------------------------------------------- /src/predict/gps/kernels.rs: -------------------------------------------------------------------------------- 1 | //! Kernels (covariance functions) for Gaussian processes. See the [Kernel 2 | //! Cookbook](https://www.cs.toronto.edu/~duvenaud/cookbook/) for more details about kernels. 3 | 4 | use crate::linalg::{Dot, Matrix, Vector}; 5 | 6 | pub trait Kernel { 7 | fn forward(&self, x: T, y: T) -> S; 8 | } 9 | 10 | /// The [radial basis function kernel](https://en.wikipedia.org/wiki/Radial_basis_function_kernel). 11 | /// Also called the squared exponential kernel. 12 | pub struct RBFKernel { 13 | /// output variance parameter 14 | var: f64, 15 | /// length scale parameter 16 | length_scale: f64, 17 | } 18 | 19 | /// Type alias for the RBF kernel. 20 | pub type SquaredExponentialKernel = RBFKernel; 21 | 22 | impl RBFKernel { 23 | pub fn new(var: f64, length_scale: f64) -> Self { 24 | assert!(var > 0., "output variance must be positive"); 25 | assert!(length_scale > 0., "length scale must be positive"); 26 | Self { var, length_scale } 27 | } 28 | } 29 | 30 | /// The [rational quadratic 31 | /// kernel](https://en.wikipedia.org/wiki/Rational_quadratic_covariance_function). 32 | pub struct RationalQuadraticKernel { 33 | /// output variance parameter 34 | var: f64, 35 | /// scale mixture parameter 36 | alpha: f64, 37 | /// length scale of kernel 38 | length_scale: f64, 39 | } 40 | 41 | /// Type alias for the rational quadratic kernel. 42 | pub type RQKernel = RationalQuadraticKernel; 43 | 44 | impl RQKernel { 45 | pub fn new(var: f64, alpha: f64, length_scale: f64) -> Self { 46 | assert!(var > 0., "output variance must be positive"); 47 | assert!(alpha > 0., "scale mixture parameter must be positive"); 48 | assert!(length_scale > 0., "length scale must be positive"); 49 | Self { 50 | var, 51 | alpha, 52 | length_scale, 53 | } 54 | } 55 | } 56 | 57 | // /// Periodic kernel. 58 | // struct PeriodicKernel { 59 | // /// output variance parameter 60 | // var: f64, 61 | // /// period parameter 62 | // p: f64, 63 | // /// length scale parameter 64 | // length_scale: f64, 65 | // } 66 | 67 | macro_rules! impl_kernel_f64_for_rbf { 68 | ($t1: ty) => { 69 | impl Kernel<$t1, f64> for RBFKernel { 70 | fn forward(&self, x: $t1, y: $t1) -> f64 { 71 | (-(x - y).powi(2) / (2. * self.length_scale.powi(2))).exp() * self.var 72 | } 73 | } 74 | }; 75 | } 76 | 77 | impl_kernel_f64_for_rbf!(f64); 78 | impl_kernel_f64_for_rbf!(&f64); 79 | 80 | macro_rules! impl_kernel_f64_for_rq { 81 | ($t1: ty) => { 82 | impl Kernel<$t1, f64> for RationalQuadraticKernel { 83 | fn forward(&self, x: $t1, y: $t1) -> f64 { 84 | (1. + (x - y).powi(2) / (2. * self.alpha * self.length_scale.powi(2))) 85 | .powf(self.alpha) 86 | * self.var 87 | } 88 | } 89 | }; 90 | } 91 | 92 | impl_kernel_f64_for_rq!(f64); 93 | impl_kernel_f64_for_rq!(&f64); 94 | 95 | macro_rules! impl_kernel_vec_for_rbf { 96 | ($t1: ty, $t2: ty) => { 97 | impl Kernel<$t1, $t2> for RBFKernel { 98 | fn forward(&self, x: $t1, y: $t1) -> $t2 { 99 | let (x, y) = (x.reshape(-1, 1), y.reshape(-1, 1)); 100 | (-(x.powi(2).reshape(-1, 1) + y.powi(2).reshape(1, -1) - 2. * x.dot_t(y)) 101 | / (2. * self.length_scale.powi(2))) 102 | .exp() 103 | * self.var 104 | } 105 | } 106 | }; 107 | } 108 | 109 | impl_kernel_vec_for_rbf!(Matrix, Matrix); 110 | impl_kernel_vec_for_rbf!(Vector, Matrix); 111 | impl_kernel_vec_for_rbf!(&Matrix, Matrix); 112 | impl_kernel_vec_for_rbf!(&Vector, Matrix); 113 | 114 | macro_rules! impl_kernel_vec_for_rq { 115 | ($t1: ty, $t2: ty) => { 116 | impl Kernel<$t1, $t2> for RationalQuadraticKernel { 117 | fn forward(&self, x: $t1, y: $t1) -> $t2 { 118 | let (x, y) = (x.reshape(-1, 1), y.reshape(-1, 1)); 119 | (1. + (x.powi(2).reshape(-1, 1) + y.powi(2).reshape(1, -1) - 2. * x.dot_t(y)) 120 | / (2. * self.alpha * self.length_scale.powi(2))) 121 | .powf(self.alpha) 122 | * self.var 123 | } 124 | } 125 | }; 126 | } 127 | 128 | impl_kernel_vec_for_rq!(Matrix, Matrix); 129 | impl_kernel_vec_for_rq!(Vector, Matrix); 130 | impl_kernel_vec_for_rq!(&Matrix, Matrix); 131 | impl_kernel_vec_for_rq!(&Vector, Matrix); 132 | -------------------------------------------------------------------------------- /src/predict/gps/mod.rs: -------------------------------------------------------------------------------- 1 | //! Tools for dealing with [Gaussian processes](https://en.wikipedia.org/wiki/Gaussian_process#Usual_covariance_functions). 2 | 3 | pub mod kernels; 4 | 5 | pub use self::kernels::*; 6 | -------------------------------------------------------------------------------- /src/predict/mod.rs: -------------------------------------------------------------------------------- 1 | //! Various statistical models for data fitting and prediction. 2 | 3 | mod glms; 4 | mod gps; 5 | mod polynomial; 6 | // use crate::optimize::optimizers::Optimizer; 7 | 8 | // /// A predictor for which the parameters can be optimized and updated. 9 | // pub trait Predictor { 10 | // fn update(&mut self, params: &[f64]) -> &mut Self; 11 | // // fn fit_with_optimizer( 12 | // // &mut self, 13 | // // x: &[f64], 14 | // // y: &[f64], 15 | // // optimizer: O, 16 | // // maxsteps: usize, 17 | // // ) -> &mut Self 18 | // // where 19 | // // O: Optimizer; 20 | // fn predict(&self, x: &[f64]) -> Vec; 21 | // } 22 | 23 | pub use self::glms::*; 24 | pub use self::gps::*; 25 | pub use self::polynomial::*; 26 | -------------------------------------------------------------------------------- /src/predict/polynomial.rs: -------------------------------------------------------------------------------- 1 | use crate::linalg::*; 2 | 3 | /// Implements a [polynomial regressor](https://en.wikipedia.org/wiki/Polynomial_regression). 4 | /// 5 | /// In the case of a two coefficients, this reduces to simple linear regression, 6 | /// where the first parameter is the intercept, and the second is the slope. 7 | #[derive(Debug)] 8 | pub struct PolynomialRegressor { 9 | pub coef: Vec, 10 | } 11 | 12 | impl PolynomialRegressor { 13 | /// Create a new polynomial regressor with degree `deg` (e.g., deg = 1 is a linear model). 14 | pub fn new(deg: usize) -> Self { 15 | PolynomialRegressor { 16 | coef: vec![0.; deg + 1], 17 | } 18 | } 19 | 20 | /// Update the coefficients of the polynomial regressor. 21 | fn update(&mut self, params: &[f64]) -> &mut Self { 22 | self.coef = params.to_owned(); 23 | self 24 | } 25 | 26 | /// Returns `c0 + c[1] * x + c[2] * x^2 ... + cn + x^n`, where `c[i]` are the coefficients of the 27 | /// polynomial regressor, and `x` is some vector of explanatory variables. Evaluation is done 28 | /// using [Horner's method](https://en.wikipedia.org/wiki/Horner%27s_method). 29 | pub fn predict(&self, x: &[f64]) -> Vec { 30 | x.iter() 31 | .map(|val| { 32 | self.coef 33 | .iter() 34 | .rev() 35 | .fold(0., |acc, coeff| acc * val + coeff) 36 | }) 37 | .collect::>() 38 | } 39 | 40 | /// Fit the polynomial regressor to some observed data `y` given some explanatory variables 41 | /// `x`. Uses least squares fitting. 42 | pub fn fit(&mut self, x: &[f64], y: &[f64]) -> &mut Self { 43 | assert_eq!(x.len(), y.len()); 44 | let xv = vandermonde(x, self.coef.len()); 45 | let xtx = xtx(&xv, x.len()); 46 | let xtxinv = invert_matrix(&xtx); 47 | let xty = matmul(&xv, y, x.len(), y.len(), true, false); 48 | let coeffs = matmul( 49 | &xtxinv, 50 | &xty, 51 | self.coef.len(), 52 | self.coef.len(), 53 | false, 54 | false, 55 | ); 56 | self.update(&coeffs) 57 | } 58 | } 59 | 60 | #[cfg(test)] 61 | mod tests { 62 | use super::*; 63 | // use crate::distributions::{Distribution, Normal}; 64 | // use crate::optimize::optimizers::LM; 65 | use approx_eq::assert_approx_eq; 66 | 67 | #[test] 68 | fn test_slr() { 69 | let x = vec![0., 1., 2., 3., 4., 5., 6., 7., 8., 9.]; 70 | let y = vec![5., 7., 9., 11., 13., 15., 17., 19., 21., 23.]; 71 | let mut slr = PolynomialRegressor::new(1); 72 | slr.update(&[5., 2.]); 73 | assert_eq!(slr.predict(&x), y); 74 | slr.update(&[0., 1.]); 75 | assert_eq!(slr.predict(&x), x); 76 | } 77 | 78 | #[test] 79 | fn test_fits() { 80 | let x: Vec = (0..250).into_iter().map(|x| x as f64 / 10.).collect(); 81 | let yv: Vec = (&x).into_iter().map(|v| 5. + 2. * v).collect(); 82 | // let scatter = Normal::new(0., 5.); 83 | // let y: Vec = (&yv).into_iter().map(|v| v + scatter.sample()).collect(); 84 | let coeffs = [5., 2.]; 85 | 86 | let mut p = PolynomialRegressor::new(1); 87 | p.fit(&x, &yv); 88 | let coeffs1 = p.coef; 89 | 90 | // p.update(&[2., 2.]); 91 | // let o = LM::default(); 92 | // p.fit_with_optimizer(&x.to_vec(), &y.to_vec(), o, 50); 93 | // let coeffs2 = p.get_coeffs(); 94 | 95 | for i in 0..2 { 96 | assert_approx_eq!(coeffs[i], coeffs1[i], 1e-3); 97 | } 98 | } 99 | } 100 | -------------------------------------------------------------------------------- /src/prelude.rs: -------------------------------------------------------------------------------- 1 | pub use crate::distributions::*; 2 | pub use crate::functions::*; 3 | pub use crate::integrate::*; 4 | pub use crate::linalg::*; 5 | pub use crate::optimize::*; 6 | pub use crate::predict::*; 7 | pub use crate::statistics::*; 8 | pub use crate::timeseries::*; 9 | pub use crate::validation::*; 10 | -------------------------------------------------------------------------------- /src/statistics/covariance.rs: -------------------------------------------------------------------------------- 1 | use crate::statistics::mean; 2 | 3 | /// Calculates the covariance between two vectors x and y. This is a two-pass algorithm which 4 | /// centers the data before computing the covariance, which improves stability but does not 5 | /// change the result as covariance is invariant with respect to shifts. 6 | pub fn covariance(x: &[f64], y: &[f64]) -> f64 { 7 | assert_eq!(x.len(), y.len()); 8 | let mean_x = mean(x); 9 | let mean_y = mean(y); 10 | let n = x.len(); 11 | 12 | (0..n) 13 | .into_iter() 14 | .map(|i| (x[i] - mean_x) * (y[i] - mean_y)) 15 | .sum::() 16 | / n as f64 17 | } 18 | /// 19 | /// Calculates the sample covariance between two vectors x and y. This is a two-pass algorithm which 20 | /// centers the data before computing the covariance, which improves stability but does not 21 | /// change the result as covariance is invariant with respect to shifts. 22 | pub fn sample_covariance(x: &[f64], y: &[f64]) -> f64 { 23 | assert_eq!(x.len(), y.len()); 24 | let mean_x = mean(x); 25 | let mean_y = mean(y); 26 | let n = x.len(); 27 | 28 | (0..n) 29 | .into_iter() 30 | .map(|i| (x[i] - mean_x) * (y[i] - mean_y)) 31 | .sum::() 32 | / (n - 1) as f64 33 | } 34 | 35 | /// Calculates the covariance between two vectors x and y. This is a one-pass algorithm which 36 | /// shifts the data by the first element in each vector before computing the covariance, 37 | /// which improves stability but does not change the result as covariance is invariant with respect to shifts. 38 | pub fn sample_covariance_onepass(x: &[f64], y: &[f64]) -> f64 { 39 | assert_eq!(x.len(), y.len()); 40 | let n = x.len(); 41 | (0..n) 42 | .into_iter() 43 | .map(|i| (x[i] - x[0]) * (y[i] - y[0])) 44 | .sum::() 45 | / (n - 1) as f64 46 | } 47 | 48 | /// Calculates the covariance between two vectors x and y. This is a stable one-pass online algorithm. 49 | /// See 50 | pub fn sample_covariance_online(x: &[f64], y: &[f64]) -> f64 { 51 | assert_eq!(x.len(), y.len()); 52 | let mut meanx = 0.; 53 | let mut meany = 0.; 54 | let mut c = 0.; 55 | let mut n = 0.; 56 | 57 | for (i, j) in x.iter().zip(y.iter()) { 58 | n += 1.; 59 | let dx = i - meanx; 60 | let dy = j - meany; 61 | meanx += dx / n; 62 | meany += dy / n; 63 | c += dx * dy; 64 | } 65 | 66 | c / n 67 | } 68 | 69 | #[cfg(test)] 70 | mod tests { 71 | use super::*; 72 | use approx_eq::assert_approx_eq; 73 | 74 | #[test] 75 | fn test_covariance() { 76 | let x1: Vec = vec![-2.1, -1., 4.3]; 77 | let y1: Vec = vec![3., 1.1, 0.12]; 78 | assert_approx_eq!(covariance(&x1, &y1), -2.8573333); 79 | assert_approx_eq!(sample_covariance(&x1, &y1), -4.286); 80 | 81 | let x2: Vec = vec![1.1, 1.7, 2.1, 1.4, 0.2]; 82 | let y2: Vec = vec![3.0, 4.2, 4.9, 4.1, 2.5]; 83 | assert_approx_eq!(covariance(&x2, &y2), 0.532); 84 | assert_approx_eq!(sample_covariance(&x2, &y2), 0.665); 85 | } 86 | } 87 | -------------------------------------------------------------------------------- /src/statistics/hist.rs: -------------------------------------------------------------------------------- 1 | use crate::linalg::Vector; 2 | 3 | // Given the edges of some intervals, return the centers of the intervals. If `edges` is length n, 4 | // then the resulting vector will be length n - 1. 5 | pub fn hist_bin_centers(edges: &[f64]) -> Vector { 6 | let diff = Vector::from(edges).diff(); 7 | 8 | diff.into_iter() 9 | .scan((edges[0] + edges[1]) / 2., |acc, x| { 10 | let temp = *acc; 11 | *acc += x; 12 | Some(temp) 13 | }) 14 | .collect() 15 | } 16 | 17 | #[cfg(test)] 18 | mod tests { 19 | use super::*; 20 | use crate::distributions::*; 21 | use crate::linalg::*; 22 | 23 | #[test] 24 | fn test_hist_bin_centers() { 25 | for _ in 0..10 { 26 | let lower = DiscreteUniform::new(0, 50).sample(); 27 | let upper = DiscreteUniform::new(50, 100).sample(); 28 | let n = DiscreteUniform::new(5, 500).sample() as usize; 29 | let arr = linspace(lower, upper, n); 30 | let answer = (Vector::from(&arr[1..]) + Vector::from(&arr[..arr.len() - 1])) / 2.; 31 | assert!(Vector::from(hist_bin_centers(&arr)).close_to(&answer, 1e-6)); 32 | } 33 | } 34 | } 35 | -------------------------------------------------------------------------------- /src/statistics/mod.rs: -------------------------------------------------------------------------------- 1 | //! A module for computing statistics of data. 2 | 3 | mod covariance; 4 | mod hist; 5 | mod moments; 6 | mod order; 7 | // mod tests; 8 | 9 | pub use self::covariance::*; 10 | pub use self::hist::*; 11 | pub use self::moments::*; 12 | pub use self::order::*; 13 | // pub use self::tests::*; 14 | -------------------------------------------------------------------------------- /src/statistics/moments.rs: -------------------------------------------------------------------------------- 1 | //! A module for computing statistical moments and related values. In particular, this includes 2 | //! means and variances. 3 | 4 | use crate::linalg::sum; 5 | 6 | /// An implementation of Welford's online algorithm, which is used for calculating statistics in a 7 | /// recurrent and stable manner. 8 | /// See https://en.wikipedia.org/wiki/Algorithms_for_calculating_variance for the reference 9 | /// implementation of the Welford update algorithm. 10 | fn welford_update(existing_aggregate: (usize, f64, f64), new_val: &f64) -> (usize, f64, f64) { 11 | // existing aggregate consists of (count, mean, M2) 12 | let (mut count, mut mean, mut m2) = existing_aggregate; 13 | count += 1; 14 | let delta = new_val - mean; 15 | mean += delta / count as f64; 16 | let delta2 = new_val - mean; 17 | m2 += delta * delta2; 18 | (count, mean, m2) 19 | } 20 | 21 | /// Uses the Welford online algorithm to calculate the count, mean, and m2 of an array of data 22 | /// points. This is the driver for the `mean`, `variance`, and `sample_variance` functions. 23 | fn welford_statistics(data: &[f64]) -> (usize, f64, f64) { 24 | let mut aggregate = (0_usize, 0., 0.); 25 | for i in data { 26 | aggregate = welford_update(aggregate, i); 27 | } 28 | aggregate 29 | } 30 | 31 | // /// Calulates the sum of an array of data points. 32 | // pub fn sum(data: &[f64]) -> f64 { 33 | // data.iter().sum::() 34 | // } 35 | 36 | /// Calculates the mean of an array of data points. 37 | pub fn mean(data: &[f64]) -> f64 { 38 | sum(data) / data.len() as f64 39 | } 40 | 41 | /// Calculates the mean of an array of data points using the Welford algorithm. 42 | pub fn welford_mean(data: &[f64]) -> f64 { 43 | let (_, mean, _) = welford_statistics(data); 44 | mean 45 | } 46 | 47 | /// Calculates the population variance from an array of data points in a numerically stable manner 48 | /// using the Welford algorithm. 49 | pub fn var(data: &[f64]) -> f64 { 50 | let (count, _, m2) = welford_statistics(data); 51 | m2 / count as f64 52 | } 53 | 54 | /// Calculates the sample variance from an array of data points in a numerically stable manner 55 | /// using the Welford algorithm. 56 | pub fn sample_var(data: &[f64]) -> f64 { 57 | let (count, _, m2) = welford_statistics(data); 58 | m2 / (count - 1) as f64 59 | } 60 | 61 | /// Calculates the standard deviation of an array of data points. This is the square root of the 62 | /// variance. 63 | pub fn std(data: &[f64]) -> f64 { 64 | var(data).sqrt() 65 | } 66 | 67 | /// Calculates the sample standard deviation of an array of data points. This is the square root of the 68 | /// sample variance. 69 | pub fn sample_std(data: &[f64]) -> f64 { 70 | sample_var(data).sqrt() 71 | } 72 | 73 | #[cfg(test)] 74 | mod tests { 75 | use super::*; 76 | use approx_eq::assert_approx_eq; 77 | 78 | #[test] 79 | fn test_mean() { 80 | let data1: Vec = vec![ 81 | -0.2711336, 82 | 1.20002575, 83 | 0.69102151, 84 | -0.56390913, 85 | -1.62661382, 86 | -0.0613969, 87 | 0.39876752, 88 | -0.99619281, 89 | 1.12860854, 90 | -0.61163405, 91 | ]; 92 | assert_approx_eq!(mean(&data1), -0.071245699); 93 | 94 | let data2: Vec = vec![ 95 | -1.35521905, 96 | 0.70316493, 97 | -0.24386284, 98 | 0.20382644, 99 | 1.28818114, 100 | -0.90003795, 101 | -0.73912347, 102 | 1.48550753, 103 | 1.02038191, 104 | 0.18684426, 105 | ]; 106 | assert_approx_eq!(mean(&data2), 0.16496629); 107 | } 108 | #[test] 109 | fn test_welford_mean() { 110 | let data1: Vec = vec![ 111 | -0.2711336, 112 | 1.20002575, 113 | 0.69102151, 114 | -0.56390913, 115 | -1.62661382, 116 | -0.0613969, 117 | 0.39876752, 118 | -0.99619281, 119 | 1.12860854, 120 | -0.61163405, 121 | ]; 122 | assert_approx_eq!(welford_mean(&data1), -0.071245699); 123 | 124 | let data2: Vec = vec![ 125 | -1.35521905, 126 | 0.70316493, 127 | -0.24386284, 128 | 0.20382644, 129 | 1.28818114, 130 | -0.90003795, 131 | -0.73912347, 132 | 1.48550753, 133 | 1.02038191, 134 | 0.18684426, 135 | ]; 136 | assert_approx_eq!(welford_mean(&data2), 0.16496629); 137 | } 138 | #[test] 139 | fn test_var() { 140 | let data1: Vec = vec![ 141 | -0.2711336, 142 | 1.20002575, 143 | 0.69102151, 144 | -0.56390913, 145 | -1.62661382, 146 | -0.0613969, 147 | 0.39876752, 148 | -0.99619281, 149 | 1.12860854, 150 | -0.61163405, 151 | ]; 152 | assert_approx_eq!(var(&data1), 0.7707231173572182); 153 | 154 | let data2: Vec = vec![ 155 | -1.35521905, 156 | 0.70316493, 157 | -0.24386284, 158 | 0.20382644, 159 | 1.28818114, 160 | -0.90003795, 161 | -0.73912347, 162 | 1.48550753, 163 | 1.02038191, 164 | 0.18684426, 165 | ]; 166 | assert_approx_eq!(var(&data2), 0.8458540238604941); 167 | } 168 | #[test] 169 | fn test_sample_var() { 170 | let data1: Vec = vec![ 171 | -0.2711336, 172 | 1.20002575, 173 | 0.69102151, 174 | -0.56390913, 175 | -1.62661382, 176 | -0.0613969, 177 | 0.39876752, 178 | -0.99619281, 179 | 1.12860854, 180 | -0.61163405, 181 | ]; 182 | assert_approx_eq!(sample_var(&data1), 0.8563590181955176); 183 | 184 | let data2: Vec = vec![ 185 | -1.35521905, 186 | 0.70316493, 187 | -0.24386284, 188 | 0.20382644, 189 | 1.28818114, 190 | -0.90003795, 191 | -0.73912347, 192 | 1.48550753, 193 | 1.02038191, 194 | 0.18684426, 195 | ]; 196 | assert_approx_eq!(sample_var(&data2), 0.939837803612305); 197 | } 198 | #[test] 199 | fn test_std() { 200 | let data1: Vec = vec![ 201 | -0.2711336, 202 | 1.20002575, 203 | 0.69102151, 204 | -0.56390913, 205 | -1.62661382, 206 | -0.0613969, 207 | 0.39876752, 208 | -0.99619281, 209 | 1.12860854, 210 | -0.61163405, 211 | ]; 212 | assert_approx_eq!(std(&data1), 0.8779083758433825); 213 | 214 | let data2: Vec = vec![ 215 | -1.35521905, 216 | 0.70316493, 217 | -0.24386284, 218 | 0.20382644, 219 | 1.28818114, 220 | -0.90003795, 221 | -0.73912347, 222 | 1.48550753, 223 | 1.02038191, 224 | 0.18684426, 225 | ]; 226 | assert_approx_eq!(std(&data2), 0.9197032256391593); 227 | } 228 | } 229 | -------------------------------------------------------------------------------- /src/statistics/order.rs: -------------------------------------------------------------------------------- 1 | //! A module for computing order statistics. This includes medians, quantiles, and extrema. 2 | 3 | /// Returns the smallest element in the array. 4 | pub fn min(data: &[f64]) -> f64 { 5 | data.iter().fold(f64::NAN, |acc, i| f64::min(acc, *i)) 6 | } 7 | 8 | /// Returns the largest element in the array. 9 | pub fn max(data: &[f64]) -> f64 { 10 | data.iter().fold(f64::NAN, |acc, i| f64::max(acc, *i)) 11 | } 12 | 13 | /// Returns the index of the smallest element in the array. 14 | pub fn argmin(data: &[f64]) -> usize { 15 | data.iter() 16 | .enumerate() 17 | .fold( 18 | (0, f64::MAX), 19 | |acc, (i, j)| if acc.1 > *j { (i, *j) } else { acc }, 20 | ) 21 | .0 22 | } 23 | 24 | /// Returns the index of the largest element in the array. 25 | pub fn argmax(data: &[f64]) -> usize { 26 | data.iter() 27 | .enumerate() 28 | .fold( 29 | (0, f64::MIN), 30 | |acc, (i, j)| if acc.1 < *j { (i, *j) } else { acc }, 31 | ) 32 | .0 33 | } 34 | 35 | #[cfg(test)] 36 | mod tests { 37 | use super::*; 38 | use approx_eq::assert_approx_eq; 39 | 40 | #[test] 41 | fn test_min() { 42 | let data1: Vec = vec![ 43 | -0.2711336, 44 | 1.20002575, 45 | 0.69102151, 46 | -0.56390913, 47 | -1.62661382, 48 | -0.0613969, 49 | 0.39876752, 50 | -0.99619281, 51 | 1.12860854, 52 | -0.61163405, 53 | ]; 54 | assert_approx_eq!(min(&data1), -1.62661382); 55 | 56 | let data2: Vec = vec![ 57 | -1.35521905, 58 | 0.70316493, 59 | -0.24386284, 60 | 0.20382644, 61 | 1.28818114, 62 | -0.90003795, 63 | -0.73912347, 64 | 1.48550753, 65 | 1.02038191, 66 | 0.18684426, 67 | ]; 68 | assert_approx_eq!(min(&data2), -1.35521905); 69 | } 70 | 71 | #[test] 72 | fn test_argmin() { 73 | let data1: Vec = vec![ 74 | -0.2711336, 75 | 1.20002575, 76 | 0.69102151, 77 | -0.56390913, 78 | -1.62661382, 79 | -0.0613969, 80 | 0.39876752, 81 | -0.99619281, 82 | 1.12860854, 83 | -0.61163405, 84 | ]; 85 | assert_eq!(argmin(&data1), 4); 86 | 87 | let data2: Vec = vec![ 88 | -1.35521905, 89 | 0.70316493, 90 | -0.24386284, 91 | 0.20382644, 92 | 1.28818114, 93 | -0.90003795, 94 | -0.73912347, 95 | 1.48550753, 96 | 1.02038191, 97 | 0.18684426, 98 | ]; 99 | assert_eq!(argmin(&data2), 0); 100 | } 101 | 102 | #[test] 103 | fn test_max() { 104 | let data1: Vec = vec![ 105 | -0.2711336, 106 | 1.20002575, 107 | 0.69102151, 108 | -0.56390913, 109 | -1.62661382, 110 | -0.0613969, 111 | 0.39876752, 112 | -0.99619281, 113 | 1.12860854, 114 | -0.61163405, 115 | ]; 116 | assert_approx_eq!(max(&data1), 1.20002575); 117 | 118 | let data2: Vec = vec![ 119 | -1.35521905, 120 | 0.70316493, 121 | -0.24386284, 122 | 0.20382644, 123 | 1.28818114, 124 | -0.90003795, 125 | -0.73912347, 126 | 1.48550753, 127 | 1.02038191, 128 | 0.18684426, 129 | ]; 130 | assert_approx_eq!(max(&data2), 1.48550753); 131 | } 132 | 133 | #[test] 134 | fn test_argmax() { 135 | let data1: Vec = vec![ 136 | -0.2711336, 137 | 1.20002575, 138 | 0.69102151, 139 | -0.56390913, 140 | -1.62661382, 141 | -0.0613969, 142 | 0.39876752, 143 | -0.99619281, 144 | 1.12860854, 145 | -0.61163405, 146 | ]; 147 | assert_eq!(argmax(&data1), 1); 148 | 149 | let data2: Vec = vec![ 150 | -1.35521905, 151 | 0.70316493, 152 | -0.24386284, 153 | 0.20382644, 154 | 1.28818114, 155 | -0.90003795, 156 | -0.73912347, 157 | 1.48550753, 158 | 1.02038191, 159 | 0.18684426, 160 | ]; 161 | assert_eq!(argmax(&data2), 7); 162 | } 163 | } 164 | -------------------------------------------------------------------------------- /src/statistics/tests.rs: -------------------------------------------------------------------------------- 1 | /// Calculates the t-test for the mean of one set of data. It tests for the null hypothesis that 2 | /// the mean of a sample of independent observations `data` is equal to the population mean `mu`. 3 | /// It returns the t statistic and the two-sided p-value. 4 | pub fn ttest_1s(data: &[f64], mu: f64) -> (f64, f64) { 5 | unimplemented!(); 6 | } 7 | 8 | /// Calculates the Student's t-test for two independent samples, assuming equal variance. This 9 | /// is less reliable than Welch's t-test. See . 10 | pub fn ttest_2s_student(x: &[f64], y: &[f64]) -> (f64, f64) { 11 | unimplemented!(); 12 | } 13 | 14 | /// Calculates Welch's t-test for two independent samples, without assuming equal variance. It tests 15 | /// the hypothesis that the two populations have equal means. This is more reliable when the two 16 | /// samples have unequal variances and/or unequal sample sizes. See 17 | /// . 18 | pub fn ttest_2s_welch(x: &[f64], y: &[f64]) -> (f64, f64) { 19 | unimplemented!(); 20 | } 21 | -------------------------------------------------------------------------------- /src/timeseries/autoregressive.rs: -------------------------------------------------------------------------------- 1 | use super::acf; 2 | use crate::linalg::*; 3 | use crate::statistics::mean; 4 | use std::fmt::{Display, Formatter, Result}; 5 | 6 | /// Implements an [autoregressive models](https://en.wikipedia.org/wiki/Autoregressive_model). 7 | #[derive(Debug)] 8 | pub struct AR { 9 | pub p: usize, 10 | pub coeffs: Vec, 11 | pub intercept: f64, 12 | } 13 | 14 | /// Create a new [autoregressive model](https://en.wikipedia.org/wiki/Autoregressive_model) of 15 | /// order p. 16 | impl AR { 17 | pub fn new(p: usize) -> Self { 18 | assert!(p > 0, "p must be greater than 0"); 19 | AR { 20 | p, 21 | coeffs: vec![0.; p], 22 | intercept: 0., 23 | } 24 | } 25 | 26 | /// Fit the AR(p) model to the data using the Yule-Walker equations. 27 | pub fn fit(&mut self, data: &[f64]) -> &mut Self { 28 | self.intercept = mean(data); 29 | let adjusted = data 30 | .iter() 31 | .map(|x| x - self.intercept) 32 | .collect::>(); 33 | let autocorrelations: Vec = (0..=self.p).map(|t| acf(&adjusted, t as i32)).collect(); 34 | let r = &autocorrelations[1..]; 35 | let n = r.len(); 36 | let r_matrix = invert_matrix(&toeplitz(&autocorrelations[..n])); 37 | let coeffs = matmul(&r_matrix, r, n, n, false, false); 38 | self.coeffs = coeffs; 39 | self.coeffs.reverse(); 40 | self 41 | } 42 | 43 | /// Given some data, predict the value for a single timestep ahead. 44 | pub fn predict_one(&self, data: &[f64]) -> f64 { 45 | let n = data.len(); 46 | let coeff_len = self.coeffs.len(); 47 | if n >= coeff_len { 48 | dot(&data[n - coeff_len..], &self.coeffs) 49 | } else { 50 | // maybe panic instead? or return NA 51 | // return std::f64::NAN; 52 | dot(data, &self.coeffs[..n]) 53 | } 54 | } 55 | 56 | /// Predict n values ahead. For forecasts after the first forecast, uses previous forecasts as 57 | /// "data" to create subsequent forecasts. 58 | pub fn predict(&self, data: &[f64], n: usize) -> Vec { 59 | let forecasts = vec![0.; n]; 60 | let mut d: Vec = data[data.len() - self.coeffs.len()..].to_vec(); 61 | d.extend(forecasts); 62 | for i in self.coeffs.len()..d.len() { 63 | d[i] = self.predict_one(&d[..i]); 64 | } 65 | d[d.len() - n..] 66 | .to_vec() 67 | .iter() 68 | .map(|x| x + self.intercept) 69 | .collect() 70 | } 71 | } 72 | 73 | impl Display for AR { 74 | fn fmt(&self, f: &mut Formatter<'_>) -> Result { 75 | writeln!(f, "AR({}) model", self.p)?; 76 | for (p, coeff) in self.coeffs.iter().rev().enumerate() { 77 | writeln!(f, "p{:.4} = {:.4}", p + 1, coeff)?; 78 | } 79 | writeln!(f, "intercept = {:.4}", self.intercept)?; 80 | Ok(()) 81 | } 82 | } 83 | 84 | #[cfg(test)] 85 | mod tests { 86 | use super::*; 87 | use crate::statistics::mean; 88 | use approx_eq::{assert_approx_eq, rel_diff}; 89 | 90 | #[test] 91 | fn test_ar_model() { 92 | let data = vec![ 93 | 0.21038, 94 | 1.131721, 95 | 3.28641, 96 | 2.338077, 97 | 1.499455, 98 | 1.19406, 99 | -0.6015611, 100 | -1.287033, 101 | -3.051659, 102 | -2.630405, 103 | 0.1041386, 104 | 2.933628, 105 | 3.872648, 106 | 3.519838, 107 | 1.81834, 108 | -1.454362, 109 | -2.431581, 110 | -3.986453, 111 | -3.122605, 112 | -1.141113, 113 | -0.07377645, 114 | -0.5474213, 115 | 0.2350843, 116 | -1.247623, 117 | -1.788729, 118 | -0.1836658, 119 | -0.6114766, 120 | -0.0003512522, 121 | 1.27916, 122 | -0.2754683, 123 | -1.792122, 124 | -0.1902297, 125 | -1.64871, 126 | -1.227125, 127 | -1.666066, 128 | -2.217532, 129 | 0.3182005, 130 | 0.839974, 131 | 1.883632, 132 | 2.562701, 133 | 2.064571, 134 | 1.347031, 135 | 0.5822702, 136 | -0.2100001, 137 | -0.9831178, 138 | -2.022402, 139 | -0.2950079, 140 | 2.435764, 141 | 0.1554406, 142 | 1.180818, 143 | 0.9291775, 144 | -1.096983, 145 | -0.3009598, 146 | 1.009731, 147 | -1.003446, 148 | -1.346068, 149 | 0.6554112, 150 | 0.3273469, 151 | 0.0252534, 152 | 0.1289094, 153 | 0.4402104, 154 | -1.071554, 155 | -1.768173, 156 | -0.01722473, 157 | -1.309611, 158 | -1.140079, 159 | 1.76984, 160 | 1.784674, 161 | 1.269765, 162 | 0.4825738, 163 | -1.461408, 164 | -1.727341, 165 | -1.477258, 166 | 1.036593, 167 | 1.520819, 168 | 0.2923091, 169 | 0.7511532, 170 | 1.356483, 171 | -1.149694, 172 | -3.703727, 173 | -2.837313, 174 | -2.164919, 175 | -0.9490226, 176 | 1.258048, 177 | 4.173029, 178 | 5.098197, 179 | 3.297466, 180 | 1.711004, 181 | 0.5347419, 182 | -2.626136, 183 | -3.520617, 184 | -2.993732, 185 | -1.993039, 186 | -1.283884, 187 | 2.713336, 188 | 3.42282, 189 | 2.94359, 190 | 2.0757, 191 | 0.13544, 192 | -2.641659, 193 | ]; 194 | 195 | let mut ar = AR::new(4); 196 | ar.fit(&data); 197 | 198 | let coeffs: Vec = ar.coeffs.iter().rev().copied().collect(); 199 | let coeffs_from_r = vec![0.7976, -0.3638, 0.2437, -0.4929]; 200 | 201 | for i in 0..4 { 202 | assert!( 203 | rel_diff(coeffs[i], coeffs_from_r[i]) < 0.05 204 | || (coeffs[i] - coeffs_from_r[i]).abs() < 0.1, 205 | "{} {}", 206 | coeffs[i], 207 | coeffs_from_r[i] 208 | ); 209 | } 210 | 211 | let pred = ar.predict(&data, 25); 212 | let pred_from_r = vec![ 213 | -3.085083, 214 | -2.474237, 215 | -1.547886, 216 | 0.2270113, 217 | 1.67267, 218 | 2.105194, 219 | 1.900999, 220 | 1.059832, 221 | -0.1426515, 222 | -1.05828, 223 | -1.455626, 224 | -1.318618, 225 | -0.6962956, 226 | 0.1038774, 227 | 0.7445638, 228 | 1.048708, 229 | 0.9470198, 230 | 0.51771, 231 | -0.02880782, 232 | -0.482939, 233 | -0.7009118, 234 | -0.6315113, 235 | -0.3386402, 236 | 0.04001635, 237 | 0.3596356, 238 | ]; 239 | 240 | for i in 0..25 { 241 | assert!( 242 | rel_diff(pred[i], pred_from_r[i]) < 0.05 || (pred[i] - pred_from_r[i]).abs() < 0.1, 243 | "{} {}", 244 | pred[i], 245 | pred_from_r[i] 246 | ); 247 | } 248 | 249 | let far_pred = ar.predict(&data, 1000).pop().unwrap(); 250 | assert_approx_eq!(far_pred, mean(&data), 1e-4); 251 | } 252 | } 253 | -------------------------------------------------------------------------------- /src/timeseries/functions.rs: -------------------------------------------------------------------------------- 1 | use crate::statistics::mean; 2 | 3 | /// Calculates the autocovariance of lag (-)k of a vector of time series data, 4 | /// assuming that the points are equally spaced in time. 5 | pub fn acovf(ts: &[f64], k: i32) -> f64 { 6 | let n = ts.len(); 7 | let ts_mean = mean(ts); 8 | 1. / n as f64 9 | * (k.abs() as usize..n) 10 | .into_iter() 11 | .map(|i| (ts[i] - ts_mean) * (ts[i - k.abs() as usize] - ts_mean)) 12 | .sum::() 13 | } 14 | 15 | /// Calculates the autocorrelation of lag (-)k of a vector of time series data, 16 | /// assuming that the points are equally spaced in time. 17 | pub fn acf(ts: &[f64], k: i32) -> f64 { 18 | let n = ts.len(); 19 | let ts_mean = mean(ts); 20 | let numerator: f64 = 1. / n as f64 21 | * (k.abs() as usize..n) 22 | .into_iter() 23 | .map(|i| (ts[i] - ts_mean) * (ts[i - k.abs() as usize] - ts_mean)) 24 | .sum::(); 25 | let denominator: f64 = (0..n) 26 | .into_iter() 27 | .map(|i| (ts[i] - ts_mean).powi(2)) 28 | .sum::() 29 | / n as f64; 30 | numerator / denominator 31 | } 32 | 33 | /// Applies a single differencing operation to a vector. Note that the length of the vector is shortened by 34 | /// one. 35 | pub fn difference(v: Vec) -> Vec { 36 | (0..v.len() - 1).map(|i| v[i + 1] - v[i]).collect() 37 | } 38 | 39 | #[cfg(test)] 40 | mod tests { 41 | use super::*; 42 | use approx_eq::assert_approx_eq; 43 | 44 | #[test] 45 | fn test_acf_acovf() { 46 | let data: Vec = vec![ 47 | -213.0, -564.0, -35.0, -15.0, 141.0, 115.0, -420.0, -360.0, 203.0, -338.0, -431.0, 48 | 194.0, -220.0, -513.0, 154.0, -125.0, -559.0, 92.0, -21.0, -579.0, -52.0, 99.0, -543.0, 49 | -175.0, 162.0, -457.0, -346.0, 204.0, -300.0, -474.0, 164.0, -107.0, -572.0, -8.0, 50 | 83.0, -541.0, -224.0, 180.0, -420.0, -374.0, 201.0, -236.0, -531.0, 83.0, 27.0, -564.0, 51 | -112.0, 131.0, -507.0, -254.0, 199.0, -311.0, -495.0, 143.0, -46.0, -579.0, -90.0, 52 | 136.0, -472.0, -338.0, 202.0, -287.0, -477.0, 169.0, -124.0, -568.0, 17.0, 48.0, 53 | -568.0, -135.0, 162.0, -430.0, -422.0, 172.0, -74.0, -577.0, -13.0, 92.0, -534.0, 54 | -243.0, 194.0, -355.0, -465.0, 156.0, -81.0, -578.0, -64.0, 139.0, -449.0, -384.0, 55 | 193.0, -198.0, -538.0, 110.0, -44.0, -577.0, -6.0, 66.0, -552.0, -164.0, 161.0, -460.0, 56 | -344.0, 205.0, -281.0, -504.0, 134.0, -28.0, -576.0, -118.0, 156.0, -437.0, -381.0, 57 | 200.0, -220.0, -540.0, 83.0, 11.0, -568.0, -160.0, 172.0, -414.0, -408.0, 188.0, 58 | -125.0, -572.0, -32.0, 139.0, -492.0, -321.0, 205.0, -262.0, -504.0, 142.0, -83.0, 59 | -574.0, 0.0, 48.0, -571.0, -106.0, 137.0, -501.0, -266.0, 190.0, -391.0, -406.0, 194.0, 60 | -186.0, -553.0, 83.0, -13.0, -577.0, -49.0, 103.0, -515.0, -280.0, 201.0, 300.0, 61 | -506.0, 131.0, -45.0, -578.0, -80.0, 138.0, -462.0, -361.0, 201.0, -211.0, -554.0, 62 | 32.0, 74.0, -533.0, -235.0, 187.0, -372.0, -442.0, 182.0, -147.0, -566.0, 25.0, 68.0, 63 | -535.0, -244.0, 194.0, -351.0, -463.0, 174.0, -125.0, -570.0, 15.0, 72.0, -550.0, 64 | -190.0, 172.0, -424.0, -385.0, 198.0, -218.0, -536.0, 96.0, 65 | ]; 66 | 67 | let autocorrelations: Vec = vec![ 68 | 1.00000000e+00, 69 | -3.07304801e-01, 70 | -7.40350266e-01, 71 | 7.74689225e-01, 72 | 2.05155438e-01, 73 | -8.98156108e-01, 74 | 3.76063789e-01, 75 | 6.32846512e-01, 76 | -7.69256070e-01, 77 | -1.24870822e-01, 78 | 8.24513136e-01, 79 | -4.00433460e-01, 80 | -5.46316738e-01, 81 | 7.31525640e-01, 82 | 7.13826843e-02, 83 | -7.56326788e-01, 84 | 4.02091425e-01, 85 | 4.81864943e-01, 86 | -6.95651379e-01, 87 | -3.32642962e-02, 88 | 7.00509687e-01, 89 | -4.09821551e-01, 90 | -4.29661465e-01, 91 | 6.70177451e-01, 92 | -6.32534486e-04, 93 | -6.58335333e-01, 94 | 4.15408913e-01, 95 | 3.88528035e-01, 96 | -6.54277389e-01, 97 | 2.61988532e-02, 98 | 6.29088292e-01, 99 | -4.21577121e-01, 100 | -3.56941756e-01, 101 | 6.40623681e-01, 102 | -5.24300195e-02, 103 | -6.01019436e-01, 104 | 4.33174526e-01, 105 | 3.21692640e-01, 106 | -6.36287268e-01, 107 | 8.40102338e-02, 108 | 5.76432704e-01, 109 | -4.49528938e-01, 110 | -2.82910789e-01, 111 | 6.17116225e-01, 112 | -1.04847960e-01, 113 | -5.45169888e-01, 114 | 4.53570519e-01, 115 | 2.50418060e-01, 116 | -6.07894239e-01, 117 | 1.39930070e-01, 118 | ]; 119 | 120 | for i in 0..50 { 121 | assert_approx_eq!(acf(&data, i), autocorrelations[i as usize]); 122 | assert_approx_eq!(acovf(&data, i) / acovf(&data, 0), acf(&data, i)); 123 | assert!(acovf(&data, 0) >= acovf(&data, i).abs()); 124 | } 125 | } 126 | } 127 | -------------------------------------------------------------------------------- /src/timeseries/mod.rs: -------------------------------------------------------------------------------- 1 | //! Models and functions for time series analysis. 2 | 3 | mod autoregressive; 4 | mod functions; 5 | 6 | pub use autoregressive::*; 7 | pub use functions::*; 8 | -------------------------------------------------------------------------------- /src/validation/mod.rs: -------------------------------------------------------------------------------- 1 | //! Methods for validating models. 2 | 3 | mod resample; 4 | 5 | pub use resample::*; 6 | -------------------------------------------------------------------------------- /src/validation/resample.rs: -------------------------------------------------------------------------------- 1 | //! Algorithms for data resampling. 2 | 3 | use crate::distributions::{DiscreteUniform, Distribution, Distribution1D}; 4 | 5 | /// Given an array of data, returns `n_bootstrap` vectors, where each has elements that are drawn 6 | /// from the original array with replacement, and the length of each vector is the same as the 7 | /// length of the original array. 8 | pub fn bootstrap(data: &[f64], n_bootstrap: usize) -> Vec> { 9 | let mut resamples: Vec> = Vec::with_capacity(n_bootstrap); 10 | 11 | let resamp_gen = DiscreteUniform::new(0, (data.len() - 1) as i64); 12 | 13 | for _ in 0..n_bootstrap { 14 | let idxs = resamp_gen.sample_n(data.len()); 15 | resamples.push(idxs.into_iter().map(|i| data[i as usize]).collect()); 16 | } 17 | 18 | resamples 19 | } 20 | 21 | /// Given a length-n array of data, returns all leave-one-out length n-1 vectors. See 22 | /// 23 | pub fn jackknife(data: &[f64]) -> Vec> { 24 | let n = data.len(); 25 | let mut resamples: Vec> = Vec::with_capacity(n); 26 | for i in 0..n { 27 | let (front, back) = data.split_at(i); 28 | let (_, rest) = back.split_first().unwrap(); 29 | let mut v = front.to_vec(); 30 | v.extend_from_slice(rest); 31 | resamples.push(v); 32 | } 33 | resamples 34 | } 35 | 36 | /// Shuffle an array. 37 | pub fn shuffle(data: &[f64]) -> Vec { 38 | let mut shuf = data.to_vec(); 39 | let randomizer = DiscreteUniform::new(0, data.len() as i64 - 1); 40 | for _ in 0..shuf.len() * 2 { 41 | let (a, b) = (randomizer.sample(), randomizer.sample()); 42 | shuf.swap(a as usize, b as usize); 43 | } 44 | shuf 45 | } 46 | 47 | /// Shuffle two arrays. The same shuffling is applied to both arrays. That is, a pair (x_i, 48 | /// y_i) will still be paired together as (x_j, y_j) after shuffling. 49 | pub fn shuffle_two(arr1: &[f64], arr2: &[f64]) -> (Vec, Vec) { 50 | assert_eq!(arr1.len(), arr2.len()); 51 | let (mut shuf1, mut shuf2) = (arr1.to_vec(), arr2.to_vec()); 52 | let randomizer = DiscreteUniform::new(0, arr1.len() as i64 - 1); 53 | for _ in 0..arr1.len() * 2 { 54 | let (a, b) = (randomizer.sample(), randomizer.sample()); 55 | shuf1.swap(a as usize, b as usize); 56 | shuf2.swap(a as usize, b as usize); 57 | } 58 | (shuf1, shuf2) 59 | } 60 | 61 | #[cfg(test)] 62 | mod tests { 63 | use super::*; 64 | use crate::distributions::Normal; 65 | use crate::statistics::{max, mean, min, std}; 66 | use approx_eq::assert_approx_eq; 67 | 68 | #[test] 69 | fn test_bootstrap_range() { 70 | let x: Vec = (0..25).into_iter().map(|x| x as f64).collect(); 71 | let samps = bootstrap(&x, 50); 72 | for i in samps { 73 | assert!(min(&i) >= min(&x)); 74 | assert!(max(&i) <= max(&x)) 75 | } 76 | } 77 | 78 | #[test] 79 | fn test_bootstrap_moments() { 80 | let x = Normal::new(4., 2.).sample_n(1e3 as usize); 81 | let samps = bootstrap(&x, 50); 82 | let means: Vec = (&samps).into_iter().map(|samp| mean(samp)).collect(); 83 | let stds: Vec = (&samps).into_iter().map(|samp| std(samp)).collect(); 84 | 85 | // check that the sampling mean and sampling std agree with 86 | // the "true" mean and std within 2.5% 87 | assert_approx_eq!(mean(&x), mean(&means), 0.025); 88 | assert_approx_eq!(std(&x), mean(&stds), 0.025); 89 | } 90 | 91 | #[test] 92 | fn test_jackknife_size() { 93 | let x = Normal::default().sample_n(50); 94 | let jk_samples = jackknife(&x); 95 | for s in jk_samples { 96 | assert_eq!(s.len(), 49); 97 | } 98 | } 99 | 100 | #[test] 101 | fn test_jackknife_mean() { 102 | let x = Normal::default().sample_n(100); 103 | let jk_samples = jackknife(&x); 104 | let jk_means = jk_samples.iter().map(|s| mean(s)).collect::>(); 105 | assert_approx_eq!(mean(&x), mean(&jk_means)); 106 | } 107 | } 108 | --------------------------------------------------------------------------------