├── .github └── workflows │ ├── release.yml │ └── test.yml ├── .gitignore ├── Cargo.toml ├── LICENSE-APACHE ├── LICENSE-MIT ├── README.md ├── benches └── criterion.rs ├── examples └── testsuite.rs ├── images ├── DotK.svg ├── Kahan.svg ├── NaiveDot.svg ├── NaiveSum.svg ├── OnlineExactDot.svg ├── OnlineExactSum.svg └── SumK.svg ├── src ├── dot.rs ├── dot │ ├── dotk.rs │ ├── naive.rs │ ├── onlineexactdot.rs │ └── traits.rs ├── lib.rs ├── sum.rs ├── sum │ ├── cascaded.rs │ ├── ifastsum.rs │ ├── kahan.rs │ ├── naive.rs │ ├── onlineexactsum.rs │ ├── sumk.rs │ └── traits.rs ├── util.rs └── util │ └── traits.rs └── tests └── all.rs /.github/workflows/release.yml: -------------------------------------------------------------------------------- 1 | name: Release 2 | on: 3 | push: 4 | tags: 5 | - "*.*.*" 6 | 7 | jobs: 8 | test: 9 | name: Test suite 10 | runs-on: ubuntu-latest 11 | steps: 12 | - uses: actions/checkout@v4 13 | - uses: dtolnay/rust-toolchain@stable 14 | - run: cargo package -v 15 | - uses: softprops/action-gh-release@v2 16 | with: 17 | files: target/package/accurate-${{ github.ref_name }}.crate 18 | - uses: katyo/publish-crates@v2 19 | with: 20 | registry-token: ${{ secrets.CRATES_IO_TOKEN }} 21 | -------------------------------------------------------------------------------- /.github/workflows/test.yml: -------------------------------------------------------------------------------- 1 | name: Test 2 | on: 3 | - push 4 | - pull_request 5 | 6 | jobs: 7 | test: 8 | name: Test suite 9 | runs-on: ubuntu-latest 10 | continue-on-error: ${{ matrix.allow-errors }} 11 | strategy: 12 | fail-fast: false 13 | matrix: 14 | toolchain-version: 15 | - stable 16 | - beta 17 | allow-errors: [false] 18 | include: 19 | - toolchain-version: nightly 20 | allow-errors: true 21 | steps: 22 | - uses: actions/checkout@v4 23 | - uses: dtolnay/rust-toolchain@master 24 | with: 25 | toolchain: ${{ matrix.toolchain-version }} 26 | components: clippy, rustfmt 27 | - run: cargo build -v --no-default-features 28 | - run: cargo build -v 29 | - run: cargo test -v 30 | - run: cargo clippy -v 31 | - run: cargo fmt -v --check 32 | - run: cargo package -v 33 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | target 2 | Cargo.lock 3 | -------------------------------------------------------------------------------- /Cargo.toml: -------------------------------------------------------------------------------- 1 | [package] 2 | name = "accurate" 3 | version = "0.4.1" 4 | edition = "2021" 5 | authors = ["Benedikt Steinbusch "] 6 | description = "(more or less) accurate floating point algorithms" 7 | homepage = "https://github.com/bsteinb/accurate" 8 | repository = "https://github.com/bsteinb/accurate" 9 | documentation = "http://docs.rs/accurate/" 10 | readme = "README.md" 11 | keywords = [ "floating-point", "accurate", "sum", "dot-product" ] 12 | categories = [ "algorithms", "mathematics" ] 13 | license = "MIT OR Apache-2.0" 14 | exclude = [ "/.github" ] 15 | 16 | [features] 17 | default = [ "parallel" ] 18 | fma = [] 19 | parallel = [ "rayon" ] 20 | clippy = [] 21 | lint = [ "clippy" ] 22 | 23 | [dependencies] 24 | cfg-if = "1" 25 | embed-doc-image = "0.1" 26 | ieee754 = "0.2" 27 | num-traits = "0.2" 28 | rayon = { version = "1", optional = true } 29 | 30 | [dev-dependencies] 31 | criterion = "0.5" 32 | criterion-plot = "0.5" 33 | doc-comment = "0.3" 34 | num = "0.4" 35 | rand = "0.8" 36 | rug = "1.0" 37 | 38 | [[bench]] 39 | name = "criterion" 40 | harness = false 41 | -------------------------------------------------------------------------------- /LICENSE-APACHE: -------------------------------------------------------------------------------- 1 | Apache License 2 | Version 2.0, January 2004 3 | http://www.apache.org/licenses/ 4 | 5 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 6 | 7 | 1. Definitions. 8 | 9 | "License" shall mean the terms and conditions for use, reproduction, 10 | and distribution as defined by Sections 1 through 9 of this document. 11 | 12 | "Licensor" shall mean the copyright owner or entity authorized by 13 | the copyright owner that is granting the License. 14 | 15 | "Legal Entity" shall mean the union of the acting entity and all 16 | other entities that control, are controlled by, or are under common 17 | control with that entity. For the purposes of this definition, 18 | "control" means (i) the power, direct or indirect, to cause the 19 | direction or management of such entity, whether by contract or 20 | otherwise, or (ii) ownership of fifty percent (50%) or more of the 21 | outstanding shares, or (iii) beneficial ownership of such entity. 22 | 23 | "You" (or "Your") shall mean an individual or Legal Entity 24 | exercising permissions granted by this License. 25 | 26 | "Source" form shall mean the preferred form for making modifications, 27 | including but not limited to software source code, documentation 28 | source, and configuration files. 29 | 30 | "Object" form shall mean any form resulting from mechanical 31 | transformation or translation of a Source form, including but 32 | not limited to compiled object code, generated documentation, 33 | and conversions to other media types. 34 | 35 | "Work" shall mean the work of authorship, whether in Source or 36 | Object form, made available under the License, as indicated by a 37 | copyright notice that is included in or attached to the work 38 | (an example is provided in the Appendix below). 39 | 40 | "Derivative Works" shall mean any work, whether in Source or Object 41 | form, that is based on (or derived from) the Work and for which the 42 | editorial revisions, annotations, elaborations, or other modifications 43 | represent, as a whole, an original work of authorship. For the purposes 44 | of this License, Derivative Works shall not include works that remain 45 | separable from, or merely link (or bind by name) to the interfaces of, 46 | the Work and Derivative Works thereof. 47 | 48 | "Contribution" shall mean any work of authorship, including 49 | the original version of the Work and any modifications or additions 50 | to that Work or Derivative Works thereof, that is intentionally 51 | submitted to Licensor for inclusion in the Work by the copyright owner 52 | or by an individual or Legal Entity authorized to submit on behalf of 53 | the copyright owner. For the purposes of this definition, "submitted" 54 | means any form of electronic, verbal, or written communication sent 55 | to the Licensor or its representatives, including but not limited to 56 | communication on electronic mailing lists, source code control systems, 57 | and issue tracking systems that are managed by, or on behalf of, the 58 | Licensor for the purpose of discussing and improving the Work, but 59 | excluding communication that is conspicuously marked or otherwise 60 | designated in writing by the copyright owner as "Not a Contribution." 61 | 62 | "Contributor" shall mean Licensor and any individual or Legal Entity 63 | on behalf of whom a Contribution has been received by Licensor and 64 | subsequently incorporated within the Work. 65 | 66 | 2. Grant of Copyright License. Subject to the terms and conditions of 67 | this License, each Contributor hereby grants to You a perpetual, 68 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 69 | copyright license to reproduce, prepare Derivative Works of, 70 | publicly display, publicly perform, sublicense, and distribute the 71 | Work and such Derivative Works in Source or Object form. 72 | 73 | 3. Grant of Patent License. Subject to the terms and conditions of 74 | this License, each Contributor hereby grants to You a perpetual, 75 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 76 | (except as stated in this section) patent license to make, have made, 77 | use, offer to sell, sell, import, and otherwise transfer the Work, 78 | where such license applies only to those patent claims licensable 79 | by such Contributor that are necessarily infringed by their 80 | Contribution(s) alone or by combination of their Contribution(s) 81 | with the Work to which such Contribution(s) was submitted. If You 82 | institute patent litigation against any entity (including a 83 | cross-claim or counterclaim in a lawsuit) alleging that the Work 84 | or a Contribution incorporated within the Work constitutes direct 85 | or contributory patent infringement, then any patent licenses 86 | granted to You under this License for that Work shall terminate 87 | as of the date such litigation is filed. 88 | 89 | 4. Redistribution. You may reproduce and distribute copies of the 90 | Work or Derivative Works thereof in any medium, with or without 91 | modifications, and in Source or Object form, provided that You 92 | meet the following conditions: 93 | 94 | (a) You must give any other recipients of the Work or 95 | Derivative Works a copy of this License; and 96 | 97 | (b) You must cause any modified files to carry prominent notices 98 | stating that You changed the files; and 99 | 100 | (c) You must retain, in the Source form of any Derivative Works 101 | that You distribute, all copyright, patent, trademark, and 102 | attribution notices from the Source form of the Work, 103 | excluding those notices that do not pertain to any part of 104 | the Derivative Works; and 105 | 106 | (d) If the Work includes a "NOTICE" text file as part of its 107 | distribution, then any Derivative Works that You distribute must 108 | include a readable copy of the attribution notices contained 109 | within such NOTICE file, excluding those notices that do not 110 | pertain to any part of the Derivative Works, in at least one 111 | of the following places: within a NOTICE text file distributed 112 | as part of the Derivative Works; within the Source form or 113 | documentation, if provided along with the Derivative Works; or, 114 | within a display generated by the Derivative Works, if and 115 | wherever such third-party notices normally appear. The contents 116 | of the NOTICE file are for informational purposes only and 117 | do not modify the License. You may add Your own attribution 118 | notices within Derivative Works that You distribute, alongside 119 | or as an addendum to the NOTICE text from the Work, provided 120 | that such additional attribution notices cannot be construed 121 | as modifying the License. 122 | 123 | You may add Your own copyright statement to Your modifications and 124 | may provide additional or different license terms and conditions 125 | for use, reproduction, or distribution of Your modifications, or 126 | for any such Derivative Works as a whole, provided Your use, 127 | reproduction, and distribution of the Work otherwise complies with 128 | the conditions stated in this License. 129 | 130 | 5. Submission of Contributions. Unless You explicitly state otherwise, 131 | any Contribution intentionally submitted for inclusion in the Work 132 | by You to the Licensor shall be under the terms and conditions of 133 | this License, without any additional terms or conditions. 134 | Notwithstanding the above, nothing herein shall supersede or modify 135 | the terms of any separate license agreement you may have executed 136 | with Licensor regarding such Contributions. 137 | 138 | 6. Trademarks. This License does not grant permission to use the trade 139 | names, trademarks, service marks, or product names of the Licensor, 140 | except as required for reasonable and customary use in describing the 141 | origin of the Work and reproducing the content of the NOTICE file. 142 | 143 | 7. Disclaimer of Warranty. Unless required by applicable law or 144 | agreed to in writing, Licensor provides the Work (and each 145 | Contributor provides its Contributions) on an "AS IS" BASIS, 146 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or 147 | implied, including, without limitation, any warranties or conditions 148 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A 149 | PARTICULAR PURPOSE. You are solely responsible for determining the 150 | appropriateness of using or redistributing the Work and assume any 151 | risks associated with Your exercise of permissions under this License. 152 | 153 | 8. Limitation of Liability. In no event and under no legal theory, 154 | whether in tort (including negligence), contract, or otherwise, 155 | unless required by applicable law (such as deliberate and grossly 156 | negligent acts) or agreed to in writing, shall any Contributor be 157 | liable to You for damages, including any direct, indirect, special, 158 | incidental, or consequential damages of any character arising as a 159 | result of this License or out of the use or inability to use the 160 | Work (including but not limited to damages for loss of goodwill, 161 | work stoppage, computer failure or malfunction, or any and all 162 | other commercial damages or losses), even if such Contributor 163 | has been advised of the possibility of such damages. 164 | 165 | 9. Accepting Warranty or Additional Liability. While redistributing 166 | the Work or Derivative Works thereof, You may choose to offer, 167 | and charge a fee for, acceptance of support, warranty, indemnity, 168 | or other liability obligations and/or rights consistent with this 169 | License. However, in accepting such obligations, You may act only 170 | on Your own behalf and on Your sole responsibility, not on behalf 171 | of any other Contributor, and only if You agree to indemnify, 172 | defend, and hold each Contributor harmless for any liability 173 | incurred by, or claims asserted against, such Contributor by reason 174 | of your accepting any such warranty or additional liability. 175 | 176 | END OF TERMS AND CONDITIONS 177 | 178 | APPENDIX: How to apply the Apache License to your work. 179 | 180 | To apply the Apache License to your work, attach the following 181 | boilerplate notice, with the fields enclosed by brackets "{}" 182 | replaced with your own identifying information. (Don't include 183 | the brackets!) The text should be enclosed in the appropriate 184 | comment syntax for the file format. We also recommend that a 185 | file or class name and description of purpose be included on the 186 | same "printed page" as the copyright notice for easier 187 | identification within third-party archives. 188 | 189 | Copyright {yyyy} {name of copyright owner} 190 | 191 | Licensed under the Apache License, Version 2.0 (the "License"); 192 | you may not use this file except in compliance with the License. 193 | You may obtain a copy of the License at 194 | 195 | http://www.apache.org/licenses/LICENSE-2.0 196 | 197 | Unless required by applicable law or agreed to in writing, software 198 | distributed under the License is distributed on an "AS IS" BASIS, 199 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 200 | See the License for the specific language governing permissions and 201 | limitations under the License. 202 | 203 | -------------------------------------------------------------------------------- /LICENSE-MIT: -------------------------------------------------------------------------------- 1 | Copyright (c) 2016 Benedikt Steinbusch 2 | 3 | Permission is hereby granted, free of charge, to any person obtaining a copy 4 | of this software and associated documentation files (the "Software"), to deal 5 | in the Software without restriction, including without limitation the rights 6 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 7 | copies of the Software, and to permit persons to whom the Software is 8 | furnished to do so, subject to the following conditions: 9 | 10 | The above copyright notice and this permission notice shall be included in all 11 | copies or substantial portions of the Software. 12 | 13 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 14 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 15 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 16 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 17 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 18 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 19 | SOFTWARE. 20 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # (More or less) accurate floating point algorithms 2 | 3 | [![GitHub Actions status][gh-actions-shield]][gh-actions] [![Documentation: hosted][doc-shield]][doc] [![License: Apache License 2.0 or MIT][license-shield]][license] [![latest GitHub release][release-shield]][release] [![crate on crates.io][crate-shield]][crate] 4 | 5 | This crate provides several algorithms that implement highly accurate or even guaranteed correct 6 | sum and dot product for floating-point numbers without resorting to arbitrary precision arithmetic. 7 | These algorithms are available: 8 | 9 | - Kahan summation, based on [Kahan 65](https://doi.org/10.1145%2F363707.363723) 10 | - Neumaier summation, based on [Neumaier 74](https://doi.org/10.1002%2Fzamm.19740540106) 11 | - Klein summation, based on [Klein 06](https://doi.org/10.1007%2Fs00607-005-0139-x) 12 | - Accurate sum and dot product, based on [Ogita, Rump, and Oishi 05](http://dx.doi.org/10.1137/030601818) 13 | - Online exact summation, based on [Zhu and Hayes 10](http://dx.doi.org/10.1145/1824801.1824815) 14 | 15 | [gh-actions-shield]: https://img.shields.io/github/actions/workflow/status/bsteinb/accurate/test.yml?branch=master&style=flat-square 16 | [gh-actions]: https://github.com/bsteinb/accurate/actions 17 | [doc-shield]: https://img.shields.io/badge/documentation-docs.rs-blue.svg?style=flat-square 18 | [doc]: https://docs.rs/accurate/ 19 | [license-shield]: https://img.shields.io/badge/license-Apache_License_2.0_or_MIT-blue.svg?style=flat-square 20 | [license]: https://github.com/bsteinb/accurate#license 21 | [release-shield]: https://img.shields.io/github/release/bsteinb/accurate.svg?style=flat-square 22 | [release]: https://github.com/bsteinb/accurate/releases/latest 23 | [crate-shield]: https://img.shields.io/crates/v/accurate.svg?style=flat-square 24 | [crate]: https://crates.io/crates/accurate 25 | 26 | ## Usage 27 | 28 | Add the `accurate` crate as a dependency in your `Cargo.toml`: 29 | 30 | ```toml 31 | [dependencies] 32 | accurate = "0.4" 33 | ``` 34 | 35 | Then use it in your program like this: 36 | 37 | ```rust 38 | extern crate accurate; 39 | 40 | use accurate::traits::*; 41 | use accurate::sum::Sum2; 42 | 43 | fn main() { 44 | let x = vec![1.0, 2.0, 3.0]; 45 | let s = x.sum_with_accumulator::>(); 46 | assert_eq!(6.0f64, s); 47 | } 48 | ``` 49 | 50 | ## Documentation 51 | 52 | Documentation for the latest version of the crate is [on docs.rs][doc]. 53 | 54 | ## License 55 | 56 | Licensed under either of 57 | 58 | * Apache License, Version 2.0, ([LICENSE-APACHE](LICENSE-APACHE) or http://www.apache.org/licenses/LICENSE-2.0) 59 | * MIT license ([LICENSE-MIT](LICENSE-MIT) or http://opensource.org/licenses/MIT) 60 | 61 | at your option. 62 | 63 | ### Contribution 64 | 65 | Unless you explicitly state otherwise, any contribution intentionally 66 | submitted for inclusion in the work by you, as defined in the Apache-2.0 67 | license, shall be dual licensed as above, without any additional terms or 68 | conditions. 69 | -------------------------------------------------------------------------------- /benches/criterion.rs: -------------------------------------------------------------------------------- 1 | #[macro_use] 2 | extern crate criterion; 3 | extern crate num; 4 | extern crate rand; 5 | 6 | #[cfg(feature = "parallel")] 7 | extern crate rayon; 8 | 9 | extern crate accurate; 10 | 11 | use std::ops::AddAssign; 12 | 13 | use criterion::{Bencher, BenchmarkId, Criterion, Throughput}; 14 | 15 | use num::Float; 16 | 17 | use rand::distributions::Standard; 18 | use rand::prelude::*; 19 | 20 | #[cfg(feature = "parallel")] 21 | use rayon::prelude::*; 22 | 23 | use accurate::dot::{Dot2, Dot3, Dot4, Dot5, Dot6, Dot7, Dot8, Dot9, NaiveDot, OnlineExactDot}; 24 | use accurate::sum::{ 25 | Kahan, Klein, NaiveSum, Neumaier, OnlineExactSum, Sum2, Sum3, Sum4, Sum5, Sum6, Sum7, Sum8, 26 | Sum9, 27 | }; 28 | use accurate::traits::*; 29 | 30 | fn mk_vec(n: usize) -> Vec 31 | where 32 | Standard: Distribution, 33 | { 34 | let rng = rand::thread_rng(); 35 | rng.sample_iter::(&Standard).take(n).collect() 36 | } 37 | 38 | fn regular_add(b: &mut Bencher, n: &usize) 39 | where 40 | F: Float, 41 | Standard: Distribution, 42 | { 43 | let d = mk_vec::(*n); 44 | b.iter(|| { 45 | let mut s = F::zero(); 46 | for &x in &d { 47 | s = s + x; 48 | } 49 | criterion::black_box(s); 50 | }); 51 | } 52 | 53 | fn regular_add_assign(b: &mut Bencher, n: &usize) 54 | where 55 | F: Float + AddAssign, 56 | Standard: Distribution, 57 | { 58 | let d = mk_vec::(*n); 59 | b.iter(|| { 60 | let mut s = F::zero(); 61 | for &x in &d { 62 | s += x; 63 | } 64 | criterion::black_box(s); 65 | }); 66 | } 67 | 68 | fn fold(b: &mut Bencher, n: &usize) 69 | where 70 | F: Float, 71 | Standard: Distribution, 72 | { 73 | let d = mk_vec::(*n); 74 | b.iter(|| { 75 | let s = d.iter().fold(F::zero(), |acc, &x| acc + x); 76 | criterion::black_box(s); 77 | }); 78 | } 79 | 80 | fn sum_with(b: &mut Bencher, n: &usize) 81 | where 82 | F: Float, 83 | Acc: SumAccumulator, 84 | Standard: Distribution, 85 | { 86 | let d = mk_vec::(*n); 87 | b.iter(|| { 88 | let s = d.iter().cloned().sum_with_accumulator::(); 89 | criterion::black_box(s); 90 | }); 91 | } 92 | 93 | fn regular_dot(b: &mut Bencher, n: &usize) 94 | where 95 | F: Float, 96 | Standard: Distribution, 97 | { 98 | let xs = mk_vec::(*n); 99 | let ys = mk_vec::(*n); 100 | 101 | b.iter(|| { 102 | let mut d = F::zero(); 103 | for (&x, &y) in xs.iter().zip(ys.iter()) { 104 | d = d + x * y 105 | } 106 | criterion::black_box(d); 107 | }); 108 | } 109 | 110 | fn regular_dot_assign(b: &mut Bencher, n: &usize) 111 | where 112 | F: Float + AddAssign, 113 | Standard: Distribution, 114 | { 115 | let xs = mk_vec::(*n); 116 | let ys = mk_vec::(*n); 117 | 118 | b.iter(|| { 119 | let mut d = F::zero(); 120 | for (&x, &y) in xs.iter().zip(ys.iter()) { 121 | d += x * y 122 | } 123 | criterion::black_box(d); 124 | }); 125 | } 126 | 127 | fn dot_fold(b: &mut Bencher, n: &usize) 128 | where 129 | F: Float, 130 | Standard: Distribution, 131 | { 132 | let xs = mk_vec::(*n); 133 | let ys = mk_vec::(*n); 134 | 135 | b.iter(|| { 136 | let d = xs 137 | .iter() 138 | .zip(ys.iter()) 139 | .fold(F::zero(), |acc, (&x, &y)| acc + x * y); 140 | criterion::black_box(d); 141 | }); 142 | } 143 | 144 | fn dot_with(b: &mut Bencher, n: &usize) 145 | where 146 | F: Float, 147 | Acc: DotAccumulator, 148 | Standard: Distribution, 149 | { 150 | let xs = mk_vec::(*n); 151 | let ys = mk_vec::(*n); 152 | 153 | b.iter(|| { 154 | let d = xs 155 | .iter() 156 | .cloned() 157 | .zip(ys.iter().cloned()) 158 | .dot_with_accumulator::(); 159 | criterion::black_box(d); 160 | }); 161 | } 162 | 163 | #[cfg(feature = "parallel")] 164 | fn parallel_sum_with(b: &mut Bencher, n: &usize) 165 | where 166 | F: Float + Copy + Send + Sync, 167 | Acc: ParallelSumAccumulator, 168 | Standard: Distribution, 169 | { 170 | let d = mk_vec::(*n); 171 | b.iter(|| { 172 | let s = d 173 | .par_iter() 174 | .map(|&x| x) 175 | .parallel_sum_with_accumulator::(); 176 | criterion::black_box(s); 177 | }); 178 | } 179 | 180 | #[cfg(feature = "parallel")] 181 | fn parallel_dot_with(b: &mut Bencher, n: &usize) 182 | where 183 | F: Float + Copy + Send + Sync, 184 | Acc: ParallelDotAccumulator, 185 | Standard: Distribution, 186 | { 187 | let xs = mk_vec::(*n); 188 | let ys = mk_vec::(*n); 189 | 190 | b.iter(|| { 191 | let d = xs 192 | .par_iter() 193 | .zip(ys.par_iter()) 194 | .map(|(&x, &y)| (x, y)) 195 | .parallel_dot_with_accumulator::(); 196 | criterion::black_box(d); 197 | }); 198 | } 199 | 200 | macro_rules! bench1 { 201 | ($c:expr, $name:expr, $f:ident, { $($tf:ty),* }) => { 202 | $(_bench($c, concat!($name, " on ", stringify!($tf)), $f::<$tf>);)* 203 | } 204 | } 205 | 206 | macro_rules! bench2 { 207 | ($c:expr, $name:expr, $f:ident, { $($tacc:ty),* }, $tfs:tt) => { 208 | $(bench2_aux! { $c, $name, $f, $tacc, $tfs })* 209 | } 210 | } 211 | 212 | macro_rules! bench2_aux { 213 | ($c:expr, $name:expr, $f:ident, $tacc:ty, { $($tf:ty),* }) => { 214 | $(_bench($c, concat!($name, " with ", stringify!($tacc), " on ", stringify!($tf)), $f::<$tacc, $tf>);)* 215 | } 216 | } 217 | 218 | fn _bench(c: &mut Criterion, id: &str, f: fn(&mut Bencher, &usize)) { 219 | let mut group = c.benchmark_group("all"); 220 | 221 | for size in [1_000, 10_000, 100_000, 1_000_000].iter() { 222 | group.throughput(Throughput::Elements(*size as u64)); 223 | group.bench_with_input(BenchmarkId::new(id, *size), size, f); 224 | } 225 | 226 | group.finish(); 227 | } 228 | 229 | #[cfg(feature = "parallel")] 230 | fn bench_parallel(c: &mut Criterion) { 231 | bench2! { 232 | c, 233 | "parallel sum", 234 | parallel_sum_with, 235 | { 236 | NaiveSum<_>, 237 | Kahan<_>, Neumaier<_>, Klein<_>, 238 | Sum2<_>, Sum3<_>, Sum4<_>, Sum5<_>, Sum6<_>, Sum7<_>, Sum8<_>, Sum9<_>, 239 | OnlineExactSum<_> 240 | }, 241 | { f32, f64 } 242 | } 243 | 244 | bench2! { 245 | c, 246 | "parallel dot", 247 | parallel_dot_with, 248 | { 249 | NaiveDot<_>, 250 | Dot2<_>, Dot3<_>, Dot4<_>, Dot5<_>, Dot6<_>, Dot7<_>, Dot8<_>, Dot9<_>, 251 | OnlineExactDot<_> 252 | }, 253 | { f32, f64 } 254 | } 255 | } 256 | 257 | #[cfg(not(feature = "parallel"))] 258 | fn bench_parallel(_: &mut Criterion) {} 259 | 260 | fn bench_serial(c: &mut Criterion) { 261 | bench1! { c, "add", regular_add, { f32, f64 } } 262 | bench1! { c, "add assign", regular_add_assign, { f32, f64 } } 263 | bench1! { c, "fold", fold, { f32, f64 } } 264 | 265 | bench2! { 266 | c, 267 | "sum", 268 | sum_with, 269 | { 270 | NaiveSum<_>, 271 | Kahan<_>, Neumaier<_>, Klein<_>, 272 | Sum2<_>, Sum3<_>, Sum4<_>, Sum5<_>, Sum6<_>, Sum7<_>, Sum8<_>, Sum9<_>, 273 | OnlineExactSum<_> 274 | }, 275 | { f32, f64 } 276 | } 277 | 278 | bench1! { c, "dot", regular_dot, { f32, f64 } } 279 | bench1! { c, "dot assign", regular_dot_assign, { f32, f64 } } 280 | bench1! { c, "dot with fold", dot_fold, { f32, f64 } } 281 | 282 | bench2! { 283 | c, 284 | "dot", 285 | dot_with, 286 | { 287 | NaiveDot<_>, 288 | Dot2<_>, Dot3<_>, Dot4<_>, Dot5<_>, Dot6<_>, Dot7<_>, Dot8<_>, Dot9<_>, 289 | OnlineExactDot<_> 290 | }, 291 | { f32, f64 } 292 | } 293 | } 294 | 295 | criterion_group!(serial, bench_serial); 296 | criterion_group!(parallel, bench_parallel); 297 | criterion_main!(serial, parallel); 298 | -------------------------------------------------------------------------------- /examples/testsuite.rs: -------------------------------------------------------------------------------- 1 | extern crate criterion_plot; 2 | extern crate num; 3 | extern crate rand; 4 | extern crate rayon; 5 | extern crate rug; 6 | 7 | extern crate accurate; 8 | 9 | use std::io; 10 | use std::io::prelude::*; 11 | use std::path::Path; 12 | 13 | use criterion_plot::prelude::*; 14 | 15 | use rug::Float as BigFloat; 16 | 17 | use num::{Float, Integer, ToPrimitive}; 18 | 19 | use rand::Rng; 20 | 21 | use rayon::prelude::*; 22 | 23 | use accurate::dot::{Dot2, Dot3, Dot4, Dot5, Dot6, Dot7, Dot8, Dot9, NaiveDot, OnlineExactDot}; 24 | use accurate::sum::{ 25 | Kahan, Klein, NaiveSum, Neumaier, OnlineExactSum, Sum2, Sum3, Sum4, Sum5, Sum6, Sum7, Sum8, 26 | Sum9, 27 | }; 28 | use accurate::traits::*; 29 | use accurate::util::two_product; 30 | 31 | type F = f64; 32 | 33 | fn dot_exact(iter: Iter) -> F 34 | where 35 | Iter: Iterator, 36 | { 37 | let mut acc = BigFloat::new(2048); 38 | 39 | for (x, y) in iter { 40 | let a = BigFloat::with_val(2048, x); 41 | let b = BigFloat::with_val(2048, y); 42 | let c = a * b; 43 | acc = acc + c; 44 | } 45 | 46 | F::from(acc.to_f64()) 47 | } 48 | 49 | fn sum_exact(iter: Iter) -> F 50 | where 51 | Iter: Iterator, 52 | { 53 | let mut acc = BigFloat::new(2048); 54 | 55 | for x in iter { 56 | acc = acc + BigFloat::with_val(2048, x); 57 | } 58 | 59 | F::from(acc.to_f64()) 60 | } 61 | 62 | fn gendot2(n: usize, cnd: F) -> (Vec, Vec, F, F) { 63 | let m = (n / 2).to_i32().unwrap(); 64 | let eps = (-24.0).exp2(); 65 | let l = (cnd.log2() / -(eps.log2())).floor().to_i32().unwrap(); 66 | 67 | let mut rng = rand::thread_rng(); 68 | let mut x; 69 | let mut y; 70 | 71 | if n.mod_floor(&2) == 0 { 72 | let c = (1..m - 1) 73 | .map(|i| { 74 | let r = if l > 0 { i.mod_floor(&l) } else { 1 }; 75 | rng.gen::() * eps.powi(r) 76 | }) 77 | .collect::>(); 78 | 79 | x = vec![1.0]; 80 | x.extend_from_slice(&c); 81 | x.push(0.5 / cnd); 82 | x.push(-1.0); 83 | x.extend(c.into_iter().map(|x| -x)); 84 | x.push(0.5 / cnd); 85 | 86 | let b = (1..m - 1).map(|_| rng.gen()).collect::>(); 87 | 88 | y = vec![1.0]; 89 | y.extend_from_slice(&b); 90 | y.push(1.0); 91 | y.push(1.0); 92 | y.extend_from_slice(&b); 93 | y.push(1.0); 94 | } else { 95 | let c = (1..m) 96 | .map(|i| { 97 | let r = i.mod_floor(&l); 98 | rng.gen::() * eps.powi(r) 99 | }) 100 | .collect::>(); 101 | 102 | x = vec![1.0]; 103 | x.extend_from_slice(&c); 104 | x.push(1.0 / cnd); 105 | x.push(-1.0); 106 | x.extend(c.into_iter().map(|x| -x)); 107 | 108 | let b = (1..m).map(|_| rng.gen()).collect::>(); 109 | 110 | y = vec![1.0]; 111 | y.extend_from_slice(&b); 112 | y.push(1.0); 113 | y.push(1.0); 114 | y.extend_from_slice(&b); 115 | } 116 | 117 | assert_eq!(x.len(), n); 118 | assert_eq!(x.len(), y.len()); 119 | 120 | let d = dot_exact(x.iter().cloned().zip(y.iter().cloned())); 121 | let absd = dot_exact( 122 | x.iter() 123 | .cloned() 124 | .map(|x| x.abs()) 125 | .zip(y.iter().cloned().map(|x| x.abs())), 126 | ); 127 | let c = 2.0 * absd / d.abs(); 128 | 129 | (x, y, d, c) 130 | } 131 | 132 | fn gensum(n: usize, cnd: F) -> (Vec, F, F) { 133 | let (x, y, _, _) = gendot2(n / 2, cnd); 134 | let mut z = vec![]; 135 | for (x, y) in x.into_iter().zip(y.into_iter()) { 136 | let (a, b) = two_product(x, y); 137 | z.push(a); 138 | z.push(b); 139 | } 140 | let s = sum_exact(z.iter().cloned()); 141 | let c = sum_exact(z.iter().cloned().map(|x| x.abs())) / s.abs(); 142 | (z, s, c) 143 | } 144 | 145 | fn gen_dots() -> (Vec>, Vec>, Vec, Vec) { 146 | println!("Generating dot products."); 147 | let mut xs = vec![]; 148 | let mut ys = vec![]; 149 | let mut ds = vec![]; 150 | let mut cs = vec![]; 151 | 152 | let emax = 300; 153 | for e in 0..emax + 1 { 154 | print!("Working on exponent {} of {}", e, emax); 155 | for _ in 0..4 { 156 | print!("."); 157 | io::stdout().flush().unwrap(); 158 | let (x, y, d, c) = gendot2(1000, 10.0.powi(e)); 159 | xs.push(x); 160 | ys.push(y); 161 | ds.push(d); 162 | cs.push(c); 163 | } 164 | println!(" done."); 165 | } 166 | 167 | (xs, ys, ds, cs) 168 | } 169 | 170 | fn gen_sums() -> (Vec>, Vec, Vec) { 171 | println!("Generating sums."); 172 | let mut zs = vec![]; 173 | let mut ss = vec![]; 174 | let mut cs = vec![]; 175 | 176 | let emax = 280; 177 | for e in 0..emax + 1 { 178 | print!("Working on exponent {} of {}", e, emax); 179 | for _ in 0..4 { 180 | print!("."); 181 | io::stdout().flush().unwrap(); 182 | let (z, s, c) = gensum(2000, 10.0.powi(e)); 183 | zs.push(z); 184 | ss.push(s); 185 | cs.push(c); 186 | } 187 | println!(" done."); 188 | } 189 | 190 | (zs, ss, cs) 191 | } 192 | 193 | fn beautify(name: &str) -> &str { 194 | name.trim_matches(|c: char| !c.is_alphanumeric()) 195 | } 196 | 197 | fn make_figure(filename: &'static str, title: &'static str) -> Figure { 198 | let mut f = Figure::new(); 199 | f.set(Title(title)) 200 | .set(Output(Path::new(filename))) 201 | .configure(Axis::BottomX, |a| { 202 | a.set(Label("condition number")) 203 | .set(Scale::Logarithmic) 204 | .set(Range::Limits(1.0e-10, 1.0e305)) 205 | }) 206 | .configure(Axis::LeftY, |a| { 207 | a.set(Label("relative error")) 208 | .set(Scale::Logarithmic) 209 | .set(Range::Limits(1.0e-17, 10.0)) 210 | }) 211 | .configure(Key, |k| { 212 | k.set(Position::Inside(Vertical::Center, Horizontal::Right)) 213 | .set(Title("")) 214 | }); 215 | f 216 | } 217 | 218 | fn draw_figure(mut figure: Figure) { 219 | assert!(figure 220 | .draw() 221 | .expect("could not execute gnuplot") 222 | .wait_with_output() 223 | .expect("could not wait on gnuplot") 224 | .status 225 | .success()); 226 | } 227 | 228 | fn make_color() -> Color { 229 | let mut rng = rand::thread_rng(); 230 | Color::Rgb(rng.gen(), rng.gen(), rng.gen()) 231 | } 232 | 233 | fn plot(figure: &mut Figure, label: &'static str, xs: &[F], ys: &[F]) { 234 | figure.plot( 235 | Points { 236 | x: &xs[..], 237 | y: &ys[..], 238 | }, 239 | |l| { 240 | l.set(Label(label)) 241 | .set(PointType::FilledCircle) 242 | .set(PointSize(0.2)) 243 | .set(make_color()) 244 | }, 245 | ); 246 | } 247 | 248 | macro_rules! dot { 249 | ($filename:expr, $title:expr, ($xs:expr, $ys:expr, $ds:expr, $cs:expr), $($acct:path),*) => { 250 | let mut figure = make_figure($filename, $title); 251 | $(dot_::<$acct>(&mut figure, beautify(stringify!($acct)), $xs, $ys, $ds, $cs);)* 252 | draw_figure(figure); 253 | } 254 | } 255 | 256 | fn dot_( 257 | figure: &mut Figure, 258 | name: &'static str, 259 | xs: &[Vec], 260 | ys: &[Vec], 261 | ds: &[F], 262 | cs: &[F], 263 | ) where 264 | Acc: DotAccumulator, 265 | { 266 | print!("Testing dot product with `{}`...", name); 267 | let mut drawcs = vec![]; 268 | let mut drawes = vec![]; 269 | for i in 0..xs.len() { 270 | let d = xs[i] 271 | .iter() 272 | .cloned() 273 | .zip(ys[i].iter().cloned()) 274 | .dot_with_accumulator::(); 275 | let e = ((d - ds[i]).abs() / ds[i].abs()).min(1.0).max(1.0e-16); 276 | if e != drawes.last().cloned().unwrap_or(-1.0) 277 | || cs[i] >= 1000.0 * drawcs.last().cloned().unwrap_or(-1.0) 278 | { 279 | drawcs.push(cs[i]); 280 | drawes.push(e); 281 | } 282 | } 283 | plot(figure, name, &drawcs[..], &drawes[..]); 284 | println!(" done."); 285 | } 286 | 287 | macro_rules! parallel_dot { 288 | ($filename:expr, $title:expr, ($xs:expr, $ys:expr, $ds:expr, $cs:expr), $($acct:path),*) => { 289 | let mut figure = make_figure($filename, $title); 290 | $(parallel_dot_::<$acct>(&mut figure, beautify(stringify!($acct)), $xs, $ys, $ds, $cs);)* 291 | draw_figure(figure); 292 | } 293 | } 294 | 295 | fn parallel_dot_( 296 | figure: &mut Figure, 297 | name: &'static str, 298 | xs: &[Vec], 299 | ys: &[Vec], 300 | ds: &[F], 301 | cs: &[F], 302 | ) where 303 | Acc: ParallelDotAccumulator, 304 | { 305 | print!("Testing parallel dot with `{}`...", name); 306 | let mut drawcs = vec![]; 307 | let mut drawes = vec![]; 308 | for i in 0..xs.len() { 309 | let d = xs[i] 310 | .par_iter() 311 | .zip(ys[i].par_iter()) 312 | .map(|(&x, &y)| (x, y)) 313 | .parallel_dot_with_accumulator::(); 314 | let e = ((d - ds[i]).abs() / ds[i].abs()).min(1.0).max(1.0e-16); 315 | if e != drawes.last().cloned().unwrap_or(-1.0) 316 | || cs[i] >= 1000.0 * drawcs.last().cloned().unwrap_or(-1.0) 317 | { 318 | drawcs.push(cs[i]); 319 | drawes.push(e); 320 | } 321 | } 322 | plot(figure, name, &drawcs[..], &drawes[..]); 323 | println!(" done."); 324 | } 325 | 326 | macro_rules! sum { 327 | ($filename:expr, $title:expr, ($zs:expr, $ds:expr, $cs:expr), $($acct:path),*) => { 328 | let mut figure = make_figure($filename, $title); 329 | $(sum_::<$acct>(&mut figure, beautify(stringify!($acct)), $zs, $ds, $cs);)* 330 | draw_figure(figure); 331 | } 332 | } 333 | 334 | fn sum_(figure: &mut Figure, name: &'static str, zs: &[Vec], ds: &[F], cs: &[F]) 335 | where 336 | Acc: SumAccumulator, 337 | { 338 | print!("Testing sum with `{}`...", name); 339 | let mut drawcs = vec![]; 340 | let mut drawes = vec![]; 341 | for i in 0..zs.len() { 342 | let d = zs[i].iter().cloned().sum_with_accumulator::(); 343 | let e = ((d - ds[i]).abs() / ds[i].abs()).min(1.0).max(1.0e-16); 344 | if e != drawes.last().cloned().unwrap_or(-1.0) 345 | || cs[i] >= 1000.0 * drawcs.last().cloned().unwrap_or(-1.0) 346 | { 347 | drawcs.push(cs[i]); 348 | drawes.push(e); 349 | } 350 | } 351 | plot(figure, name, &drawcs[..], &drawes[..]); 352 | println!(" done."); 353 | } 354 | 355 | macro_rules! parallel_sum { 356 | ($filename:expr, $title:expr, ($zs:expr, $ds:expr, $cs:expr), $($acct:path),*) => { 357 | let mut figure = make_figure($filename, $title); 358 | $(parallel_sum_::<$acct>(&mut figure, beautify(stringify!($acct)), $zs, $ds, $cs);)* 359 | draw_figure(figure); 360 | } 361 | } 362 | 363 | fn parallel_sum_(figure: &mut Figure, name: &'static str, zs: &[Vec], ds: &[F], cs: &[F]) 364 | where 365 | Acc: ParallelSumAccumulator, 366 | { 367 | print!("Testing parallel sum with `{}`...", name); 368 | let mut drawcs = vec![]; 369 | let mut drawes = vec![]; 370 | for i in 0..zs.len() { 371 | let d = zs[i] 372 | .par_iter() 373 | .map(|&x| x) 374 | .parallel_sum_with_accumulator::(); 375 | let e = ((d - ds[i]).abs() / ds[i].abs()).min(1.0).max(1.0e-16); 376 | if e != drawes.last().cloned().unwrap_or(-1.0) 377 | || cs[i] >= 1000.0 * drawcs.last().cloned().unwrap_or(-1.0) 378 | { 379 | drawcs.push(cs[i]); 380 | drawes.push(e); 381 | } 382 | } 383 | plot(figure, name, &drawcs[..], &drawes[..]); 384 | println!(" done."); 385 | } 386 | 387 | fn main() { 388 | let (xs, ys, ds, cs) = gen_dots(); 389 | 390 | dot! { 391 | "NaiveDot.svg", 392 | "NaiveDot, double precision", 393 | (&xs, &ys, &ds, &cs), 394 | NaiveDot<_> 395 | } 396 | dot! { 397 | "DotK.svg", 398 | "DotK for K = 2...9, double precision", 399 | (&xs, &ys, &ds, &cs), 400 | Dot2<_>, Dot3<_>, Dot4<_>, Dot5<_>, Dot6<_>, Dot7<_>, Dot8<_>, Dot9<_> 401 | } 402 | dot! { 403 | "OnlineExactDot.svg", 404 | "OnlineExactDot, double precision", 405 | (&xs, &ys, &ds, &cs), 406 | OnlineExactDot<_> 407 | }; 408 | 409 | parallel_dot! { 410 | "ParallelNaiveDot.svg", 411 | "Parallel NaiveDot, double precision", 412 | (&xs, &ys, &ds, &cs), 413 | NaiveDot<_> 414 | }; 415 | parallel_dot! { 416 | "ParallelDotK.svg", 417 | "Parallel DotK for K = 2...9, double precision", 418 | (&xs, &ys, &ds, &cs), 419 | Dot2<_>, Dot3<_>, Dot4<_>, Dot5<_>, Dot6<_>, Dot7<_>, Dot8<_>, Dot9<_> 420 | }; 421 | parallel_dot! { 422 | "ParallelOnlineExactDot.svg", 423 | "Parallel OnlineExactDot, double precision", 424 | (&xs, &ys, &ds, &cs), 425 | OnlineExactDot<_> 426 | }; 427 | 428 | let (zs, ds, cs) = gen_sums(); 429 | 430 | sum! { 431 | "NaiveSum.svg", 432 | "NaiveSum, double precision", 433 | (&zs, &ds, &cs), 434 | NaiveSum<_> 435 | }; 436 | sum! { 437 | "Kahan.svg", 438 | "Kahan, Neumaier, and Klein summation, double precision", 439 | (&zs, &ds, &cs), 440 | NaiveSum<_>, Kahan<_>, Neumaier<_>, Klein<_> 441 | }; 442 | sum! { 443 | "SumK.svg", 444 | "SumK for K = 2...9, double precision", 445 | (&zs, &ds, &cs), 446 | NaiveSum<_>, Sum2<_>, Sum3<_>, Sum4<_>, Sum5<_>, Sum6<_>, Sum7<_>, Sum8<_>, Sum9<_> 447 | }; 448 | sum! { 449 | "OnlineExactSum.svg", 450 | "OnlineExactSum, double precision", 451 | (&zs, &ds, &cs), 452 | OnlineExactSum<_> 453 | }; 454 | 455 | parallel_sum! { 456 | "ParallelNaiveSum.svg", 457 | "Parallel NaiveSum, double precision", 458 | (&zs, &ds, &cs), 459 | NaiveSum<_> 460 | }; 461 | parallel_sum! { 462 | "ParallelKahan.svg", 463 | "Parallel Kahan, Neumaier, and Klein summation, double precision", 464 | (&zs, &ds, &cs), 465 | NaiveSum<_>, Kahan<_>, Neumaier<_>, Klein<_> 466 | }; 467 | parallel_sum! { 468 | "ParallelSumK.svg", 469 | "Parallel SumK for K = 2...9, double precision", 470 | (&zs, &ds, &cs), 471 | NaiveSum<_>, Sum2<_>, Sum3<_>, Sum4<_>, Sum5<_>, Sum6<_>, Sum7<_>, Sum8<_>, Sum9<_> 472 | }; 473 | parallel_sum! { 474 | "ParallelOnlineExactSum.svg", 475 | "Parallel OnlineExactSum, double precision", 476 | (&zs, &ds, &cs), 477 | OnlineExactSum<_> 478 | }; 479 | } 480 | -------------------------------------------------------------------------------- /images/Kahan.svg: -------------------------------------------------------------------------------- 1 | Produced by GNUPLOT 6.0 patchlevel 1 1x10-16 1x10-14 1x10-12 1x10-10 1x10-8 1x10-6 0.0001 0.01 1 1 1x1050 1x10100 1x10150 1x10200 1x10250 1x10300NaiveSumKahanNeumaierKleinrelative errorcondition numberKahan, Neumaier, and Klein summation, double precision -------------------------------------------------------------------------------- /images/NaiveDot.svg: -------------------------------------------------------------------------------- 1 | Produced by GNUPLOT 6.0 patchlevel 1 1x10-16 1x10-14 1x10-12 1x10-10 1x10-8 1x10-6 0.0001 0.01 1 1 1x1050 1x10100 1x10150 1x10200 1x10250 1x10300NaiveDotrelative errorcondition numberNaiveDot, double precision -------------------------------------------------------------------------------- /images/NaiveSum.svg: -------------------------------------------------------------------------------- 1 | Produced by GNUPLOT 6.0 patchlevel 1 1x10-16 1x10-14 1x10-12 1x10-10 1x10-8 1x10-6 0.0001 0.01 1 1 1x1050 1x10100 1x10150 1x10200 1x10250 1x10300NaiveSumrelative errorcondition numberNaiveSum, double precision -------------------------------------------------------------------------------- /images/OnlineExactDot.svg: -------------------------------------------------------------------------------- 1 | Produced by GNUPLOT 6.0 patchlevel 1 1x10-16 1x10-14 1x10-12 1x10-10 1x10-8 1x10-6 0.0001 0.01 1 1 1x1050 1x10100 1x10150 1x10200 1x10250 1x10300OnlineExactDotrelative errorcondition numberOnlineExactDot, double precision -------------------------------------------------------------------------------- /images/OnlineExactSum.svg: -------------------------------------------------------------------------------- 1 | Produced by GNUPLOT 6.0 patchlevel 1 1x10-16 1x10-14 1x10-12 1x10-10 1x10-8 1x10-6 0.0001 0.01 1 1 1x1050 1x10100 1x10150 1x10200 1x10250 1x10300OnlineExactSumrelative errorcondition numberOnlineExactSum, double precision -------------------------------------------------------------------------------- /src/dot.rs: -------------------------------------------------------------------------------- 1 | //! Algorithms for dot product 2 | 3 | pub mod traits; 4 | 5 | mod dotk; 6 | mod naive; 7 | mod onlineexactdot; 8 | 9 | pub use self::dotk::{Dot2, Dot3, Dot4, Dot5, Dot6, Dot7, Dot8, Dot9, DotK}; 10 | pub use self::naive::NaiveDot; 11 | pub use self::onlineexactdot::OnlineExactDot; 12 | 13 | #[cfg(feature = "parallel")] 14 | use num_traits::Zero; 15 | 16 | #[cfg(feature = "parallel")] 17 | use rayon::iter::plumbing::{Consumer, Folder, UnindexedConsumer}; 18 | 19 | #[cfg(feature = "parallel")] 20 | use self::traits::DotAccumulator; 21 | #[cfg(feature = "parallel")] 22 | use self::traits::ParallelDotAccumulator; 23 | #[cfg(feature = "parallel")] 24 | use crate::util::AddReducer; 25 | 26 | /// Adapts a `DotAccumulator` into a `Folder` 27 | #[cfg(feature = "parallel")] 28 | #[derive(Copy, Clone, Debug)] 29 | pub struct DotFolder(Acc); 30 | 31 | #[cfg(feature = "parallel")] 32 | impl Folder<(F, F)> for DotFolder 33 | where 34 | Acc: DotAccumulator, 35 | { 36 | type Result = Acc; 37 | 38 | #[inline] 39 | fn consume(self, item: (F, F)) -> Self { 40 | DotFolder(self.0 + item) 41 | } 42 | 43 | #[inline] 44 | fn complete(self) -> Self::Result { 45 | self.0 46 | } 47 | 48 | #[inline] 49 | fn full(&self) -> bool { 50 | false 51 | } 52 | } 53 | 54 | /// Adapts a `ParallelDotAccumulator` into a `Consumer` 55 | #[cfg(feature = "parallel")] 56 | #[derive(Copy, Clone, Debug)] 57 | pub struct DotConsumer(Acc); 58 | 59 | #[cfg(feature = "parallel")] 60 | impl Consumer<(F, F)> for DotConsumer 61 | where 62 | Acc: ParallelDotAccumulator, 63 | F: Zero + Send, 64 | { 65 | type Folder = DotFolder; 66 | type Reducer = AddReducer; 67 | type Result = Acc; 68 | 69 | #[inline] 70 | fn split_at(self, _index: usize) -> (Self, Self, Self::Reducer) { 71 | (self, Acc::zero().into_consumer(), AddReducer) 72 | } 73 | 74 | #[inline] 75 | fn into_folder(self) -> Self::Folder { 76 | DotFolder(self.0) 77 | } 78 | 79 | #[inline] 80 | fn full(&self) -> bool { 81 | false 82 | } 83 | } 84 | 85 | #[cfg(feature = "parallel")] 86 | impl UnindexedConsumer<(F, F)> for DotConsumer 87 | where 88 | Acc: ParallelDotAccumulator, 89 | F: Zero + Send, 90 | { 91 | #[inline] 92 | fn split_off_left(&self) -> Self { 93 | Acc::zero().into_consumer() 94 | } 95 | 96 | #[inline] 97 | fn to_reducer(&self) -> Self::Reducer { 98 | AddReducer 99 | } 100 | } 101 | -------------------------------------------------------------------------------- /src/dot/dotk.rs: -------------------------------------------------------------------------------- 1 | //! The `DotK` algorithm 2 | 3 | use std::ops::Add; 4 | 5 | #[cfg(doc)] 6 | use embed_doc_image::embed_doc_image; 7 | 8 | use num_traits::Float; 9 | 10 | use super::traits::DotAccumulator; 11 | use crate::sum::traits::SumAccumulator; 12 | use crate::sum::{NaiveSum, Sum2, Sum3, Sum4, Sum5, Sum6, Sum7, Sum8}; 13 | use crate::util::traits::TwoProduct; 14 | use crate::util::{two_product, two_sum}; 15 | 16 | /// Calculates a dot product using both product transformation and cascaded accumulators 17 | /// 18 | /// See also `Dot2`... `Dot9`. 19 | /// 20 | /// ![][DotK] 21 | /// 22 | /// # References 23 | /// 24 | /// Based on [Ogita, Rump and Oishi 05](http://dx.doi.org/10.1137/030601818) 25 | #[cfg_attr(doc, embed_doc_image("DotK", "images/DotK.svg"))] 26 | #[derive(Copy, Clone, Debug)] 27 | pub struct DotK { 28 | p: F, 29 | r: R, 30 | } 31 | 32 | impl DotAccumulator for DotK 33 | where 34 | F: Float + TwoProduct, 35 | R: SumAccumulator, 36 | { 37 | #[inline] 38 | fn dot(self) -> F { 39 | (self.r + self.p).sum() 40 | } 41 | } 42 | 43 | impl Add<(F, F)> for DotK 44 | where 45 | F: TwoProduct, 46 | R: SumAccumulator, 47 | { 48 | type Output = Self; 49 | 50 | #[inline] 51 | fn add(self, (a, b): (F, F)) -> Self { 52 | let (h, r1) = two_product(a, b); 53 | let (p, r2) = two_sum(self.p, h); 54 | DotK { 55 | p, 56 | r: (self.r + r1) + r2, 57 | } 58 | } 59 | } 60 | 61 | impl From for DotK 62 | where 63 | F: Float, 64 | R: SumAccumulator, 65 | { 66 | fn from(x: F) -> Self { 67 | DotK { p: x, r: R::zero() } 68 | } 69 | } 70 | 71 | impl Add for DotK 72 | where 73 | F: Float, 74 | R: SumAccumulator, 75 | R::Output: Add, 76 | { 77 | type Output = Self; 78 | 79 | #[inline] 80 | fn add(self, rhs: Self) -> Self::Output { 81 | let (p, r) = two_sum(self.p, rhs.p); 82 | DotK { 83 | p, 84 | r: (self.r + r) + rhs.r, 85 | } 86 | } 87 | } 88 | 89 | unsafe impl Send for DotK 90 | where 91 | F: Send, 92 | R: Send, 93 | { 94 | } 95 | 96 | /// `DotK` with two cascaded accumulators 97 | /// 98 | /// ![][DotK] 99 | /// 100 | /// # Examples 101 | /// 102 | /// ``` 103 | /// use accurate::traits::*; 104 | /// use accurate::dot::Dot2; 105 | /// 106 | /// let d = Dot2::zero() + (1.0, 1.0) + (2.0, 2.0) + (3.0, 3.0); 107 | /// assert_eq!(14.0f64, d.dot()); 108 | /// ``` 109 | /// 110 | /// # References 111 | /// 112 | /// Based on [Ogita, Rump and Oishi 05](http://dx.doi.org/10.1137/030601818) 113 | #[cfg_attr(doc, embed_doc_image("DotK", "images/DotK.svg"))] 114 | pub type Dot2 = DotK>; 115 | 116 | /// `DotK` with three cascaded accumulators 117 | /// 118 | /// ![][DotK] 119 | /// 120 | /// # Examples 121 | /// 122 | /// ``` 123 | /// use accurate::traits::*; 124 | /// use accurate::dot::Dot3; 125 | /// 126 | /// let d = Dot3::zero() + (1.0, 1.0) + (2.0, 2.0) + (3.0, 3.0); 127 | /// assert_eq!(14.0f64, d.dot()); 128 | /// ``` 129 | /// 130 | /// # References 131 | /// 132 | /// Based on [Ogita, Rump and Oishi 05](http://dx.doi.org/10.1137/030601818) 133 | #[cfg_attr(doc, embed_doc_image("DotK", "images/DotK.svg"))] 134 | pub type Dot3 = DotK>; 135 | 136 | /// `DotK` with four cascaded accumulators 137 | /// 138 | /// ![][DotK] 139 | /// 140 | /// # Examples 141 | /// 142 | /// ``` 143 | /// use accurate::traits::*; 144 | /// use accurate::dot::Dot4; 145 | /// 146 | /// let d = Dot4::zero() + (1.0, 1.0) + (2.0, 2.0) + (3.0, 3.0); 147 | /// assert_eq!(14.0f64, d.dot()); 148 | /// ``` 149 | /// 150 | /// # References 151 | /// 152 | /// Based on [Ogita, Rump and Oishi 05](http://dx.doi.org/10.1137/030601818) 153 | #[cfg_attr(doc, embed_doc_image("DotK", "images/DotK.svg"))] 154 | pub type Dot4 = DotK>; 155 | 156 | /// `DotK` with five cascaded accumulators 157 | /// 158 | /// ![][DotK] 159 | /// 160 | /// # Examples 161 | /// 162 | /// ``` 163 | /// use accurate::traits::*; 164 | /// use accurate::dot::Dot5; 165 | /// 166 | /// let d = Dot5::zero() + (1.0, 1.0) + (2.0, 2.0) + (3.0, 3.0); 167 | /// assert_eq!(14.0f64, d.dot()); 168 | /// ``` 169 | /// 170 | /// # References 171 | /// 172 | /// Based on [Ogita, Rump and Oishi 05](http://dx.doi.org/10.1137/030601818) 173 | #[cfg_attr(doc, embed_doc_image("DotK", "images/DotK.svg"))] 174 | pub type Dot5 = DotK>; 175 | 176 | /// `DotK` with six cascaded accumulators 177 | /// 178 | /// ![][DotK] 179 | /// 180 | /// # Examples 181 | /// 182 | /// ``` 183 | /// use accurate::traits::*; 184 | /// use accurate::dot::Dot6; 185 | /// 186 | /// let d = Dot6::zero() + (1.0, 1.0) + (2.0, 2.0) + (3.0, 3.0); 187 | /// assert_eq!(14.0f64, d.dot()); 188 | /// ``` 189 | /// 190 | /// # References 191 | /// 192 | /// Based on [Ogita, Rump and Oishi 05](http://dx.doi.org/10.1137/030601818) 193 | #[cfg_attr(doc, embed_doc_image("DotK", "images/DotK.svg"))] 194 | pub type Dot6 = DotK>; 195 | 196 | /// `DotK` with seven cascaded accumulators 197 | /// 198 | /// ![][DotK] 199 | /// 200 | /// # Examples 201 | /// 202 | /// ``` 203 | /// use accurate::traits::*; 204 | /// use accurate::dot::Dot7; 205 | /// 206 | /// let d = Dot7::zero() + (1.0, 1.0) + (2.0, 2.0) + (3.0, 3.0); 207 | /// assert_eq!(14.0f64, d.dot()); 208 | /// ``` 209 | /// 210 | /// # References 211 | /// 212 | /// Based on [Ogita, Rump and Oishi 05](http://dx.doi.org/10.1137/030601818) 213 | #[cfg_attr(doc, embed_doc_image("DotK", "images/DotK.svg"))] 214 | pub type Dot7 = DotK>; 215 | 216 | /// `DotK` with eight cascaded accumulators 217 | /// 218 | /// ![][DotK] 219 | /// 220 | /// # Examples 221 | /// 222 | /// ``` 223 | /// use accurate::traits::*; 224 | /// use accurate::dot::Dot8; 225 | /// 226 | /// let d = Dot8::zero() + (1.0, 1.0) + (2.0, 2.0) + (3.0, 3.0); 227 | /// assert_eq!(14.0f64, d.dot()); 228 | /// ``` 229 | /// 230 | /// # References 231 | /// 232 | /// Based on [Ogita, Rump and Oishi 05](http://dx.doi.org/10.1137/030601818) 233 | #[cfg_attr(doc, embed_doc_image("DotK", "images/DotK.svg"))] 234 | pub type Dot8 = DotK>; 235 | 236 | /// `DotK` with nine cascaded accumulators 237 | /// 238 | /// ![][DotK] 239 | /// 240 | /// # Examples 241 | /// 242 | /// ``` 243 | /// use accurate::traits::*; 244 | /// use accurate::dot::Dot9; 245 | /// 246 | /// let d = Dot9::zero() + (1.0, 1.0) + (2.0, 2.0) + (3.0, 3.0); 247 | /// assert_eq!(14.0f64, d.dot()); 248 | /// ``` 249 | /// 250 | /// # References 251 | /// 252 | /// Based on [Ogita, Rump and Oishi 05](http://dx.doi.org/10.1137/030601818) 253 | #[cfg_attr(doc, embed_doc_image("DotK", "images/DotK.svg"))] 254 | pub type Dot9 = DotK>; 255 | -------------------------------------------------------------------------------- /src/dot/naive.rs: -------------------------------------------------------------------------------- 1 | //! Naive floating point dot product 2 | 3 | use std::ops::Add; 4 | 5 | #[cfg(doc)] 6 | use embed_doc_image::embed_doc_image; 7 | 8 | use num_traits::Float; 9 | 10 | use super::traits::DotAccumulator; 11 | 12 | /// Naive floating point dot product 13 | /// 14 | /// ![][NaiveDot] 15 | /// 16 | /// # Examples 17 | /// 18 | /// ``` 19 | /// use accurate::traits::*; 20 | /// use accurate::dot::NaiveDot; 21 | /// 22 | /// let d = NaiveDot::zero() + (1.0, 1.0) + (2.0, 2.0) + (3.0, 3.0); 23 | /// assert_eq!(14.0f64, d.dot()); 24 | /// ``` 25 | #[cfg_attr(doc, embed_doc_image("NaiveDot", "images/NaiveDot.svg"))] 26 | #[derive(Copy, Clone, Debug)] 27 | pub struct NaiveDot(F); 28 | 29 | impl DotAccumulator for NaiveDot 30 | where 31 | F: Float, 32 | { 33 | #[inline] 34 | fn dot(self) -> F { 35 | self.0 36 | } 37 | } 38 | 39 | impl Add<(F, F)> for NaiveDot 40 | where 41 | F: Float, 42 | { 43 | type Output = Self; 44 | 45 | #[inline] 46 | fn add(self, rhs: (F, F)) -> Self::Output { 47 | NaiveDot(self.0 + rhs.0 * rhs.1) 48 | } 49 | } 50 | 51 | impl From for NaiveDot 52 | where 53 | F: Float, 54 | { 55 | fn from(x: F) -> Self { 56 | NaiveDot(x) 57 | } 58 | } 59 | 60 | impl Add for NaiveDot 61 | where 62 | F: Float, 63 | { 64 | type Output = Self; 65 | 66 | #[inline] 67 | fn add(self, rhs: Self) -> Self::Output { 68 | NaiveDot(self.0 + rhs.0) 69 | } 70 | } 71 | 72 | unsafe impl Send for NaiveDot where F: Send {} 73 | -------------------------------------------------------------------------------- /src/dot/onlineexactdot.rs: -------------------------------------------------------------------------------- 1 | //! `OnlineExactSum` for dot product 2 | 3 | use std::ops::Add; 4 | 5 | #[cfg(doc)] 6 | use embed_doc_image::embed_doc_image; 7 | 8 | use super::traits::DotAccumulator; 9 | use crate::sum::traits::SumAccumulator; 10 | use crate::sum::OnlineExactSum; 11 | use crate::util::traits::TwoProduct; 12 | use crate::util::two_product; 13 | 14 | /// Calculates the dot product using product transformation and `OnlineExactSum` 15 | /// 16 | /// ![][OnlineExactDot] 17 | /// 18 | /// # Examples 19 | /// 20 | /// ``` 21 | /// use accurate::traits::*; 22 | /// use accurate::dot::OnlineExactDot; 23 | /// 24 | /// let d = OnlineExactDot::zero() + (1.0, 1.0) + (2.0, 2.0) + (3.0, 3.0); 25 | /// assert_eq!(14.0f64, d.dot()); 26 | /// ``` 27 | #[cfg_attr(doc, embed_doc_image("OnlineExactDot", "images/OnlineExactDot.svg"))] 28 | #[derive(Clone, Debug)] 29 | pub struct OnlineExactDot { 30 | s: OnlineExactSum, 31 | } 32 | 33 | impl DotAccumulator for OnlineExactDot 34 | where 35 | F: TwoProduct, 36 | OnlineExactSum: SumAccumulator, 37 | { 38 | fn zero() -> Self { 39 | OnlineExactDot::from(F::zero()) 40 | } 41 | 42 | #[inline] 43 | fn dot(self) -> F { 44 | self.s.sum() 45 | } 46 | } 47 | 48 | impl Add<(F, F)> for OnlineExactDot 49 | where 50 | F: TwoProduct, 51 | OnlineExactSum: SumAccumulator, 52 | { 53 | type Output = Self; 54 | 55 | #[inline] 56 | fn add(mut self, (a, b): (F, F)) -> Self::Output { 57 | let (h, r1) = two_product(a, b); 58 | self.s = (self.s + h) + r1; 59 | self 60 | } 61 | } 62 | 63 | impl From for OnlineExactDot 64 | where 65 | OnlineExactSum: SumAccumulator, 66 | { 67 | fn from(x: F) -> Self { 68 | OnlineExactDot { 69 | s: OnlineExactSum::from(x), 70 | } 71 | } 72 | } 73 | 74 | impl Add for OnlineExactDot 75 | where 76 | OnlineExactSum: Add>, 77 | { 78 | type Output = Self; 79 | 80 | #[inline] 81 | fn add(self, rhs: Self) -> Self::Output { 82 | OnlineExactDot { s: self.s + rhs.s } 83 | } 84 | } 85 | 86 | unsafe impl Send for OnlineExactDot where F: Send {} 87 | -------------------------------------------------------------------------------- /src/dot/traits.rs: -------------------------------------------------------------------------------- 1 | //! Dot product traits 2 | 3 | use std::ops::Add; 4 | 5 | use num_traits::Zero; 6 | 7 | #[cfg(feature = "parallel")] 8 | use rayon::iter::ParallelIterator; 9 | 10 | #[cfg(feature = "parallel")] 11 | use super::DotConsumer; 12 | 13 | /// Accumulates terms of a dot product 14 | pub trait DotAccumulator: Add<(F, F), Output = Self> + From + Clone { 15 | /// Initial value for an accumulator 16 | fn zero() -> Self 17 | where 18 | F: Zero, 19 | { 20 | Self::from(F::zero()) 21 | } 22 | 23 | /// The dot product of all terms accumulated so far 24 | fn dot(self) -> F; 25 | 26 | /// Absorb the items of an iterator into the accumulator 27 | /// 28 | /// # Examples 29 | /// 30 | /// ``` 31 | /// use accurate::traits::*; 32 | /// use accurate::dot::Dot2; 33 | /// 34 | /// let x = vec![1.0, 2.0, 3.0]; 35 | /// let y = x.clone(); 36 | /// 37 | /// let d = Dot2::zero().absorb(x.into_iter().zip(y.into_iter())); 38 | /// assert_eq!(14.0f64, d.dot()) 39 | /// ``` 40 | fn absorb(self, it: I) -> Self 41 | where 42 | I: IntoIterator, 43 | { 44 | it.into_iter().fold(self, |acc, x| acc + x) 45 | } 46 | } 47 | 48 | /// Calculates the dot product of the items of an iterator 49 | /// 50 | /// # Examples 51 | /// 52 | /// ``` 53 | /// use accurate::traits::*; 54 | /// use accurate::dot::Dot2; 55 | /// 56 | /// let xy = vec![(1.0, 1.0), (2.0, 2.0), (3.0, 3.0)]; 57 | /// let d = xy.dot_with_accumulator::>(); 58 | /// assert_eq!(14.0f64, d); 59 | /// ``` 60 | pub trait DotWithAccumulator { 61 | /// Calculates the dot product of the items of an iterator 62 | fn dot_with_accumulator(self) -> F 63 | where 64 | Acc: DotAccumulator, 65 | F: Zero; 66 | } 67 | 68 | impl DotWithAccumulator for I 69 | where 70 | I: IntoIterator, 71 | { 72 | fn dot_with_accumulator(self) -> F 73 | where 74 | Acc: DotAccumulator, 75 | F: Zero, 76 | { 77 | Acc::zero().absorb(self).dot() 78 | } 79 | } 80 | 81 | /// A `DotAccumulator` that can be used in parallel computations 82 | #[cfg(feature = "parallel")] 83 | pub trait ParallelDotAccumulator: 84 | DotAccumulator + Add + Send + Sized 85 | { 86 | /// Turns an accumulator into a consumer 87 | #[inline] 88 | fn into_consumer(self) -> DotConsumer { 89 | DotConsumer(self) 90 | } 91 | } 92 | 93 | #[cfg(feature = "parallel")] 94 | impl ParallelDotAccumulator for Acc where 95 | Acc: DotAccumulator + Add + Send + Sized 96 | { 97 | } 98 | 99 | /// Calculates the dot product of an iterator, possibly in parallel 100 | /// 101 | /// # Examples 102 | /// 103 | /// ``` 104 | /// use rayon::prelude::*; 105 | /// 106 | /// use accurate::traits::*; 107 | /// use accurate::dot::OnlineExactDot; 108 | /// 109 | /// # fn main() { 110 | /// let d = vec![(1.0, 1.0), (2.0, 2.0), (3.0, 3.0)] 111 | /// .par_iter().map(|&x| x) 112 | /// .parallel_dot_with_accumulator::>(); 113 | /// assert_eq!(14.0f64, d); 114 | /// # } 115 | /// ``` 116 | #[cfg(feature = "parallel")] 117 | pub trait ParallelDotWithAccumulator: ParallelIterator 118 | where 119 | F: Send, 120 | { 121 | /// Calculate the dot product of an iterator, possibly in parallel 122 | fn parallel_dot_with_accumulator(self) -> F 123 | where 124 | Acc: ParallelDotAccumulator, 125 | F: Zero, 126 | { 127 | self.drive_unindexed(Acc::zero().into_consumer()).dot() 128 | } 129 | } 130 | 131 | #[cfg(feature = "parallel")] 132 | impl ParallelDotWithAccumulator for T 133 | where 134 | T: ParallelIterator, 135 | F: Zero + Send, 136 | { 137 | } 138 | -------------------------------------------------------------------------------- /src/lib.rs: -------------------------------------------------------------------------------- 1 | //! A collection of (more or less) accurate floating point algorithms 2 | //! 3 | //! This crate implements several algorithms for floating point summation and dot product. The 4 | //! algorithms are realized as types that implement the `SumAccumulator` and `DotAccumulator` 5 | //! trait. 6 | //! 7 | //! # Basic usage 8 | //! 9 | //! Calculating a sum (or a dot product) begins by initializing an accumulator to zero: 10 | //! 11 | //! ``` 12 | //! use accurate::traits::*; // Most functionality is derived from traits in this module 13 | //! use accurate::sum::NaiveSum; // Chose a specific algorithm to perform summation / dot product 14 | //! 15 | //! let s = NaiveSum::::zero(); 16 | //! ``` 17 | //! 18 | //! The accumulator traits are generic over the type of the underlying floating point numbers and 19 | //! the `zero()` constructor is supported if the number type implements the Zero trait. 20 | //! Alternatively the accumulator traits imply that an accumulator can be constructed `from()` an 21 | //! arbitrary value of the number type. 22 | //! 23 | //! ``` 24 | //! # use accurate::traits::*; 25 | //! # use accurate::sum::NaiveSum; 26 | //! let s = NaiveSum::from(42.0f64); 27 | //! ``` 28 | //! 29 | //! The actual calculation is performed via the `Add` trait that is also implied 30 | //! by the `SumAccumulator` trait, where `F` is the type of the floating point numbers. 31 | //! 32 | //! ``` 33 | //! # use accurate::traits::*; 34 | //! use accurate::sum::Sum2; 35 | //! 36 | //! let s = Sum2::zero() + 1.0f64 + 2.0 + 3.0; 37 | //! ``` 38 | //! 39 | //! For dot products, the `DotAccumulator` trait implies `Add<(F, F), Output = Self>` to allow 40 | //! accumulation of the products of pairs into the final result. 41 | //! 42 | //! ``` 43 | //! # use accurate::traits::*; 44 | //! use accurate::dot::NaiveDot; 45 | //! 46 | //! let d = NaiveDot::zero() + (1.0f64, 1.0f64) + (2.0, 2.0) + (3.0, 3.0); 47 | //! ``` 48 | //! 49 | //! Once all of the terms have been accumulated, the result can be evaluated using the `sum()` and 50 | //! `dot()` methods respectively. 51 | //! 52 | //! ``` 53 | //! # use accurate::traits::*; 54 | //! # use accurate::sum::Sum2; 55 | //! # use accurate::dot::NaiveDot; 56 | //! let s = Sum2::zero() + 1.0f64 + 2.0 + 3.0; 57 | //! assert_eq!(6.0, s.sum()); 58 | //! 59 | //! let d = NaiveDot::zero() + (1.0f64, 1.0f64) + (2.0, 2.0) + (3.0, 3.0); 60 | //! assert_eq!(14.0, d.dot()); 61 | //! ``` 62 | //! 63 | //! Both `sum()` and `dot()` take their argument by value, because the evaluation of the final 64 | //! result is in some cases a destructive operation on the internal state of the accumulator. 65 | //! However, the evaluation of partial results is supported by `clone()`ing the accumulator. 66 | //! 67 | //! ``` 68 | //! # use accurate::traits::*; 69 | //! # use accurate::sum::Sum2; 70 | //! let s = Sum2::zero() + 1.0f32 + 2.0; 71 | //! assert_eq!(3.0, s.clone().sum()); 72 | //! let s = s + 3.0; 73 | //! assert_eq!(6.0, s.sum()); 74 | //! ``` 75 | //! 76 | //! # Iterator consumption 77 | //! 78 | //! Accumulators can be used in `fold()` operations on iterators as one would expect. 79 | //! 80 | //! ``` 81 | //! # use accurate::traits::*; 82 | //! # use accurate::sum::Sum2; 83 | //! use accurate::dot::Dot2; 84 | //! 85 | //! let s = vec![1.0f32, 2.0, 3.0].into_iter().fold(Sum2::zero(), |acc, x| acc + x); 86 | //! assert_eq!(6.0, s.sum()); 87 | //! 88 | //! let d = vec![1.0f32, 2.0, 3.0].into_iter() 89 | //! .zip(vec![1.0, 2.0, 3.0].into_iter()) 90 | //! .fold(Dot2::zero(), |acc, xy| acc + xy); 91 | //! assert_eq!(14.0, d.dot()); 92 | //! ``` 93 | //! 94 | //! For convenience, the accumulator traits also define `absorb()` methods to absorb values from 95 | //! anything that implements `IntoIterator`. 96 | //! 97 | //! ``` 98 | //! # use accurate::traits::*; 99 | //! # use accurate::sum::Sum2; 100 | //! # use accurate::dot::Dot2; 101 | //! 102 | //! let s = Sum2::zero().absorb(vec![1.0f32, 2.0, 3.0]); 103 | //! assert_eq!(6.0, s.sum()); 104 | //! 105 | //! let d = Dot2::zero().absorb(vec![(1.0f32, 1.0), (2.0, 2.0), (3.0, 3.0)]); 106 | //! assert_eq!(14.0, d.dot()); 107 | //! ``` 108 | //! 109 | //! And for even more convenience, suitable iterators are extended by a `sum_with_accumulator()` 110 | //! (and `dot_with_accumulator()`) method that directly evaluates to the result in the floating 111 | //! point number type. 112 | //! 113 | //! ``` 114 | //! # use accurate::traits::*; 115 | //! # use accurate::sum::Sum2; 116 | //! # use accurate::dot::Dot2; 117 | //! 118 | //! let s = Sum2::zero().absorb(vec![1.0f32, 2.0, 3.0]); 119 | //! assert_eq!(6.0f64, vec![1.0, 2.0, 3.0].into_iter().sum_with_accumulator::>()); 120 | //! 121 | //! assert_eq!(14.0f64, vec![(1.0, 1.0), (2.0, 2.0), (3.0, 3.0)].into_iter() 122 | //! .dot_with_accumulator::>()); 123 | //! ``` 124 | //! 125 | #![cfg_attr( 126 | feature = "parallel", 127 | doc = " 128 | # Parallel computation 129 | 130 | If compiled with the `parallel` feature enabled (which is the default) the `rayon` parallel 131 | iterator facilities are used to perform large calculations in parallel. Parallel calculations are 132 | performed through the `parallel_sum_with_accumulator()` and `parallel_dot_with_accumulator()` 133 | extension methods on parallel iterators. 134 | 135 | ``` 136 | use rayon::prelude::*; 137 | 138 | # use accurate::traits::*; 139 | # use accurate::sum::Sum2; 140 | # fn main() { 141 | let xs = vec![1.0f64; 100_000]; 142 | let s = xs.par_iter().map(|&x| x).parallel_sum_with_accumulator::>(); 143 | assert_eq!(100_000.0, s); 144 | # } 145 | ``` 146 | " 147 | )] 148 | #![deny(missing_docs)] 149 | #![warn(missing_copy_implementations)] 150 | #![warn(missing_debug_implementations)] 151 | #![warn(trivial_casts)] 152 | #![warn(trivial_numeric_casts)] 153 | // This has false positives on #[macro_use], 154 | // see https://github.com/rust-lang/rust/issues/30849 155 | // #![warn(unused_extern_crates)] 156 | #![warn(unused_import_braces)] 157 | #![warn(unused_qualifications)] 158 | #![warn(unused_results)] 159 | #![deny(warnings)] 160 | #![warn(clippy::cast_possible_truncation)] 161 | #![warn(clippy::cast_possible_wrap)] 162 | #![warn(clippy::cast_precision_loss)] 163 | #![warn(clippy::cast_sign_loss)] 164 | #![allow(clippy::doc_markdown)] 165 | #![allow(clippy::many_single_char_names)] 166 | #![warn(clippy::mut_mut)] 167 | #![warn(clippy::mutex_integer)] 168 | #![warn(clippy::non_ascii_literal)] 169 | #![warn(clippy::print_stdout)] 170 | #![warn(clippy::single_match_else)] 171 | #![warn(clippy::string_add)] 172 | #![warn(clippy::string_add_assign)] 173 | #![warn(clippy::unicode_not_nfc)] 174 | #![warn(clippy::unwrap_used)] 175 | #![allow(clippy::suspicious_op_assign_impl)] 176 | 177 | #[cfg(doctest)] 178 | use doc_comment::doctest; 179 | 180 | #[cfg(doctest)] 181 | doctest!("../README.md"); 182 | 183 | pub mod dot; 184 | pub mod sum; 185 | pub mod util; 186 | 187 | /// Includes all traits of this crate 188 | pub mod traits { 189 | #[doc(inline)] 190 | pub use crate::dot::traits::*; 191 | #[doc(inline)] 192 | pub use crate::sum::traits::*; 193 | #[doc(inline)] 194 | pub use crate::util::traits::*; 195 | } 196 | -------------------------------------------------------------------------------- /src/sum.rs: -------------------------------------------------------------------------------- 1 | //! Algorithms for summation 2 | 3 | pub mod traits; 4 | 5 | mod cascaded; 6 | mod ifastsum; 7 | mod kahan; 8 | mod naive; 9 | mod onlineexactsum; 10 | mod sumk; 11 | 12 | pub use self::cascaded::{Klein, Neumaier}; 13 | pub use self::ifastsum::i_fast_sum_in_place; 14 | pub use self::kahan::Kahan; 15 | pub use self::naive::NaiveSum; 16 | pub use self::onlineexactsum::OnlineExactSum; 17 | pub use self::sumk::{Sum2, Sum3, Sum4, Sum5, Sum6, Sum7, Sum8, Sum9, SumK}; 18 | 19 | #[cfg(feature = "parallel")] 20 | use num_traits::Zero; 21 | 22 | #[cfg(feature = "parallel")] 23 | use rayon::iter::plumbing::{Consumer, Folder, UnindexedConsumer}; 24 | 25 | #[cfg(feature = "parallel")] 26 | use self::traits::ParallelSumAccumulator; 27 | #[cfg(feature = "parallel")] 28 | use self::traits::SumAccumulator; 29 | #[cfg(feature = "parallel")] 30 | use crate::util::AddReducer; 31 | 32 | /// Adapts a `SumAccumulator` into a `Folder` 33 | #[cfg(feature = "parallel")] 34 | #[derive(Copy, Clone, Debug)] 35 | pub struct SumFolder(Acc); 36 | 37 | #[cfg(feature = "parallel")] 38 | impl Folder for SumFolder 39 | where 40 | Acc: SumAccumulator, 41 | { 42 | type Result = Acc; 43 | 44 | #[inline] 45 | fn consume(self, item: F) -> Self { 46 | SumFolder(self.0 + item) 47 | } 48 | 49 | #[inline] 50 | fn complete(self) -> Self::Result { 51 | self.0 52 | } 53 | 54 | #[inline] 55 | fn full(&self) -> bool { 56 | false 57 | } 58 | } 59 | 60 | /// Adapts a `ParallelSumAccumulator` into a `Consumer` 61 | #[cfg(feature = "parallel")] 62 | #[derive(Copy, Clone, Debug)] 63 | pub struct SumConsumer(Acc); 64 | 65 | #[cfg(feature = "parallel")] 66 | impl Consumer for SumConsumer 67 | where 68 | Acc: ParallelSumAccumulator, 69 | F: Zero + Send, 70 | { 71 | type Folder = SumFolder; 72 | type Reducer = AddReducer; 73 | type Result = Acc; 74 | 75 | #[inline] 76 | fn split_at(self, _index: usize) -> (Self, Self, Self::Reducer) { 77 | (self, Acc::zero().into_consumer(), AddReducer) 78 | } 79 | 80 | #[inline] 81 | fn into_folder(self) -> Self::Folder { 82 | SumFolder(self.0) 83 | } 84 | 85 | #[inline] 86 | fn full(&self) -> bool { 87 | false 88 | } 89 | } 90 | 91 | #[cfg(feature = "parallel")] 92 | impl UnindexedConsumer for SumConsumer 93 | where 94 | Acc: ParallelSumAccumulator, 95 | F: Zero + Send, 96 | { 97 | #[inline] 98 | fn split_off_left(&self) -> Self { 99 | Acc::zero().into_consumer() 100 | } 101 | 102 | #[inline] 103 | fn to_reducer(&self) -> Self::Reducer { 104 | AddReducer 105 | } 106 | } 107 | -------------------------------------------------------------------------------- /src/sum/cascaded.rs: -------------------------------------------------------------------------------- 1 | //! Cascaded accumulators 2 | 3 | use std::marker::PhantomData; 4 | use std::ops::{Add, AddAssign}; 5 | 6 | #[cfg(doc)] 7 | use embed_doc_image::embed_doc_image; 8 | 9 | use num_traits::Float; 10 | 11 | use crate::sum::traits::SumAccumulator; 12 | use crate::sum::NaiveSum; 13 | use crate::util::traits::TwoSum; 14 | use crate::util::Neumaier as NeumaierTwoSum; 15 | 16 | #[derive(Copy, Clone, Debug)] 17 | pub struct Cascaded { 18 | s: F, 19 | c: C, 20 | t: PhantomData, 21 | } 22 | 23 | impl SumAccumulator for Cascaded 24 | where 25 | F: Float, 26 | C: SumAccumulator, 27 | T: TwoSum, 28 | { 29 | #[inline] 30 | fn sum(self) -> F { 31 | (self.c + self.s).sum() 32 | } 33 | } 34 | 35 | impl Add for Cascaded 36 | where 37 | Cascaded: AddAssign, 38 | { 39 | type Output = Self; 40 | 41 | #[inline] 42 | fn add(mut self, rhs: F) -> Self::Output { 43 | self += rhs; 44 | self 45 | } 46 | } 47 | 48 | impl From for Cascaded 49 | where 50 | F: Float, 51 | C: SumAccumulator, 52 | { 53 | fn from(x: F) -> Self { 54 | Cascaded { 55 | s: x, 56 | c: C::zero(), 57 | t: PhantomData, 58 | } 59 | } 60 | } 61 | 62 | impl Add for Cascaded 63 | where 64 | F: Float, 65 | C: SumAccumulator, 66 | C::Output: Add, 67 | T: TwoSum, 68 | { 69 | type Output = Self; 70 | 71 | #[inline] 72 | fn add(self, rhs: Self) -> Self::Output { 73 | let (s, c) = T::two_sum(self.s, rhs.s); 74 | Cascaded { 75 | s, 76 | c: (self.c + c) + rhs.c, 77 | t: PhantomData, 78 | } 79 | } 80 | } 81 | 82 | unsafe impl Send for Cascaded 83 | where 84 | F: Send, 85 | C: Send, 86 | { 87 | } 88 | 89 | impl AddAssign for Cascaded 90 | where 91 | F: Float, 92 | C: SumAccumulator, 93 | T: TwoSum, 94 | { 95 | #[inline] 96 | fn add_assign(&mut self, rhs: F) { 97 | let (x, y) = T::two_sum(self.s, rhs); 98 | self.s = x; 99 | self.c += y; 100 | } 101 | } 102 | 103 | /// Neumaier summation 104 | /// 105 | /// ![][Kahan] 106 | /// 107 | /// # Examples 108 | /// 109 | /// ``` 110 | /// use accurate::traits::*; 111 | /// use accurate::sum::Neumaier; 112 | /// 113 | /// let s = Neumaier::zero() + 1.0 + 2.0 + 3.0; 114 | /// assert_eq!(6.0f64, s.sum()); 115 | /// ``` 116 | /// 117 | /// # References 118 | /// 119 | /// Based on [Neumaier 74](https://doi.org/10.1002%2Fzamm.19740540106) 120 | #[cfg_attr(doc, embed_doc_image("Kahan", "images/Kahan.svg"))] 121 | pub type Neumaier = Cascaded, NeumaierTwoSum>; 122 | 123 | /// Klein summation 124 | /// 125 | /// ![][Kahan] 126 | /// 127 | /// # Examples 128 | /// 129 | /// ``` 130 | /// use accurate::traits::*; 131 | /// use accurate::sum::Klein; 132 | /// 133 | /// let s = Klein::zero() + 1.0 + 2.0 + 3.0; 134 | /// assert_eq!(6.0f64, s.sum()); 135 | /// ``` 136 | /// 137 | /// # References 138 | /// 139 | /// Based on [Klein 06](https://doi.org/10.1007%2Fs00607-005-0139-x) 140 | #[cfg_attr(doc, embed_doc_image("Kahan", "images/Kahan.svg"))] 141 | pub type Klein = Cascaded, NeumaierTwoSum>, NeumaierTwoSum>; 142 | -------------------------------------------------------------------------------- /src/sum/ifastsum.rs: -------------------------------------------------------------------------------- 1 | //! The `iFastSum` algorithm 2 | 3 | use num_traits::Float; 4 | 5 | use crate::util::traits::{HalfUlp, Round3}; 6 | use crate::util::{round3, two_sum}; 7 | 8 | /// Calculates the correctly rounded sum of numbers in a slice 9 | pub trait IFastSum: Float + HalfUlp + Round3 {} 10 | 11 | impl IFastSum for F where F: Float + HalfUlp + Round3 {} 12 | 13 | /// Calculates the correctly rounded sum of numbers in a slice 14 | /// 15 | /// This algorithm works in place by mutating the contents of the slice. It is used by 16 | /// `OnlineExactSum`. 17 | /// 18 | /// # References 19 | /// 20 | /// Based on [Zhu and Hayes 09](http://dx.doi.org/10.1137/070710020) 21 | pub fn i_fast_sum_in_place(xs: &mut [F]) -> F 22 | where 23 | F: IFastSum, 24 | { 25 | let mut n = xs.len(); 26 | i_fast_sum_in_place_aux(xs, &mut n, true) 27 | } 28 | 29 | fn i_fast_sum_in_place_aux(xs: &mut [F], n: &mut usize, recurse: bool) -> F 30 | where 31 | F: IFastSum, 32 | { 33 | // Step 1 34 | let mut s = F::zero(); 35 | 36 | // Step 2 37 | // The following accesses are guaranteed to be inside bounds, because: 38 | debug_assert!(*n <= xs.len()); 39 | for i in 0..*n { 40 | let x = unsafe { xs.get_unchecked_mut(i) }; 41 | let (a, b) = two_sum(s, *x); 42 | s = a; 43 | *x = b; 44 | } 45 | 46 | // Step 3 47 | loop { 48 | // Step 3(1) 49 | let mut count: usize = 0; // slices are indexed from 0 50 | let mut st = F::zero(); 51 | let mut sm = F::zero(); 52 | 53 | // Step 3(2) 54 | // The following accesses are guaranteed to be inside bounds, because: 55 | debug_assert!(*n <= xs.len()); 56 | for i in 0..*n { 57 | // Step 3(2)(a) 58 | let (a, b) = two_sum(st, unsafe { *xs.get_unchecked(i) }); 59 | st = a; 60 | // Step 3(2)(b) 61 | if b != F::zero() { 62 | // The following access is guaranteed to be inside bounds, because: 63 | debug_assert!(count < xs.len()); 64 | unsafe { 65 | *xs.get_unchecked_mut(count) = b; 66 | } 67 | 68 | // Step 3(2)(b)(i) 69 | // The following addition is guaranteed not to overflow, because: 70 | debug_assert!(count < usize::MAX); 71 | // and thus: 72 | debug_assert!(count.checked_add(1).is_some()); 73 | count += 1; 74 | 75 | // Step 3(2)(b)(ii) 76 | sm = sm.max(Float::abs(st)); 77 | } 78 | } 79 | 80 | // Step 3(3) 81 | let em = F::from(count).expect("count not representable as floating point number") 82 | * sm.half_ulp(); 83 | 84 | // Step 3(4) 85 | let (a, b) = two_sum(s, st); 86 | s = a; 87 | st = b; 88 | // The following access is guaranteed to be inside bounds, because: 89 | debug_assert!(count < xs.len()); 90 | unsafe { 91 | *xs.get_unchecked_mut(count) = st; 92 | } 93 | // The following addition is guaranteed not to overflow, because: 94 | debug_assert!(count < usize::MAX); 95 | // and thus: 96 | debug_assert!(count.checked_add(1).is_some()); 97 | *n = count + 1; 98 | 99 | // Step 3(5) 100 | if (em == F::zero()) || (em < s.half_ulp()) { 101 | // Step 3(5)(a) 102 | if !recurse { 103 | return s; 104 | } 105 | 106 | // Step 3(5)(b) 107 | let (w1, e1) = two_sum(st, em); 108 | // Step 3(5)(c) 109 | let (w2, e2) = two_sum(st, -em); 110 | 111 | // Step 3(5)(d) 112 | if (w1 + s != s) 113 | || (w2 + s != s) 114 | || (round3(s, w1, e1) != s) 115 | || (round3(s, w2, e2) != s) 116 | { 117 | // Step 3(5)(d)(i) 118 | let mut s1 = i_fast_sum_in_place_aux(xs, n, false); 119 | 120 | // Step 3(5)(d)(ii) 121 | let (a, b) = two_sum(s, s1); 122 | s = a; 123 | s1 = b; 124 | 125 | // Step 3(5)(d)(iii) 126 | let s2 = i_fast_sum_in_place_aux(xs, n, false); 127 | 128 | // Step 3(5)(d)(iv) 129 | s = round3(s, s1, s2); 130 | } 131 | 132 | // Step 3(5)(e) 133 | return s; 134 | } 135 | } 136 | } 137 | 138 | #[cfg(test)] 139 | mod test { 140 | use super::i_fast_sum_in_place; 141 | 142 | #[test] 143 | fn issue_5() { 144 | let a: f32 = 4194304.0; 145 | let b: f32 = 4194304.5; 146 | let s = i_fast_sum_in_place(&mut [a, b]); 147 | assert_eq!(s, 8388608.0); 148 | } 149 | } 150 | -------------------------------------------------------------------------------- /src/sum/kahan.rs: -------------------------------------------------------------------------------- 1 | //! Kahan summation 2 | 3 | use std::ops::{Add, AddAssign}; 4 | 5 | #[cfg(doc)] 6 | use embed_doc_image::embed_doc_image; 7 | 8 | use num_traits::Float; 9 | 10 | use crate::sum::traits::SumAccumulator; 11 | 12 | /// Kahan summation 13 | /// 14 | /// ![][Kahan] 15 | /// 16 | /// # Examples 17 | /// 18 | /// ``` 19 | /// use accurate::traits::*; 20 | /// use accurate::sum::Kahan; 21 | /// 22 | /// let s = Kahan::zero() + 1.0 + 2.0 + 3.0; 23 | /// assert_eq!(6.0f64, s.sum()); 24 | /// ``` 25 | /// 26 | /// # References 27 | /// 28 | /// Based on [Kahan 65](https://doi.org/10.1145%2F363707.363723) 29 | #[cfg_attr(doc, embed_doc_image("Kahan", "images/Kahan.svg"))] 30 | #[derive(Copy, Clone, Debug)] 31 | pub struct Kahan { 32 | sum: F, 33 | c: F, 34 | } 35 | 36 | impl SumAccumulator for Kahan 37 | where 38 | F: Float, 39 | { 40 | #[inline] 41 | fn sum(self) -> F { 42 | self.sum 43 | } 44 | } 45 | 46 | impl Add for Kahan 47 | where 48 | Kahan: AddAssign, 49 | { 50 | type Output = Self; 51 | 52 | #[inline] 53 | fn add(mut self, rhs: F) -> Self::Output { 54 | self += rhs; 55 | self 56 | } 57 | } 58 | 59 | impl From for Kahan 60 | where 61 | F: Float, 62 | { 63 | fn from(x: F) -> Self { 64 | Kahan { 65 | sum: x, 66 | c: F::zero(), 67 | } 68 | } 69 | } 70 | 71 | impl Add for Kahan 72 | where 73 | F: Float, 74 | { 75 | type Output = Self; 76 | 77 | #[inline] 78 | fn add(mut self, rhs: Self) -> Self::Output { 79 | self += rhs.sum; 80 | self += rhs.c; 81 | self 82 | } 83 | } 84 | 85 | unsafe impl Send for Kahan where F: Send {} 86 | 87 | impl AddAssign for Kahan 88 | where 89 | F: Float, 90 | { 91 | #[inline] 92 | fn add_assign(&mut self, rhs: F) { 93 | let t = self.sum; 94 | let y = rhs + self.c; 95 | self.sum = t + y; 96 | self.c = (t - self.sum) + y; 97 | } 98 | } 99 | -------------------------------------------------------------------------------- /src/sum/naive.rs: -------------------------------------------------------------------------------- 1 | //! Naive floating point summation 2 | 3 | use std::ops::{Add, AddAssign}; 4 | 5 | #[cfg(doc)] 6 | use embed_doc_image::embed_doc_image; 7 | 8 | use num_traits::Float; 9 | 10 | use super::traits::SumAccumulator; 11 | 12 | /// Naive floating point summation 13 | /// 14 | /// ![][NaiveSum] 15 | /// 16 | /// # Examples 17 | /// 18 | /// ``` 19 | /// use accurate::traits::*; 20 | /// use accurate::sum::NaiveSum; 21 | /// 22 | /// let s = NaiveSum::zero() + 1.0 + 2.0 + 3.0; 23 | /// assert_eq!(6.0f64, s.sum()); 24 | /// ``` 25 | #[cfg_attr(doc, embed_doc_image("NaiveSum", "images/NaiveSum.svg"))] 26 | #[derive(Copy, Clone, Debug)] 27 | pub struct NaiveSum(F); 28 | 29 | impl SumAccumulator for NaiveSum 30 | where 31 | F: Float, 32 | { 33 | #[inline] 34 | fn sum(self) -> F { 35 | self.0 36 | } 37 | } 38 | 39 | impl Add for NaiveSum 40 | where 41 | NaiveSum: AddAssign, 42 | { 43 | type Output = Self; 44 | 45 | #[inline] 46 | fn add(mut self, rhs: F) -> Self::Output { 47 | self += rhs; 48 | self 49 | } 50 | } 51 | 52 | impl From for NaiveSum 53 | where 54 | F: Float, 55 | { 56 | fn from(x: F) -> Self { 57 | NaiveSum(x) 58 | } 59 | } 60 | 61 | impl Add for NaiveSum 62 | where 63 | F: Float, 64 | { 65 | type Output = Self; 66 | 67 | #[inline] 68 | fn add(self, rhs: Self) -> Self::Output { 69 | NaiveSum(self.0 + rhs.0) 70 | } 71 | } 72 | 73 | unsafe impl Send for NaiveSum where F: Send {} 74 | 75 | impl AddAssign for NaiveSum 76 | where 77 | F: Float, 78 | { 79 | #[inline] 80 | fn add_assign(&mut self, rhs: F) { 81 | self.0 = self.0 + rhs; 82 | } 83 | } 84 | -------------------------------------------------------------------------------- /src/sum/onlineexactsum.rs: -------------------------------------------------------------------------------- 1 | //! The `OnlineExactSum` algorithm 2 | 3 | use std::ops::{Add, AddAssign}; 4 | 5 | #[cfg(doc)] 6 | use embed_doc_image::embed_doc_image; 7 | 8 | use num_traits::Float; 9 | 10 | use super::i_fast_sum_in_place; 11 | use super::traits::{IFastSum, SumAccumulator}; 12 | use crate::util::traits::{FloatFormat, RawExponent}; 13 | use crate::util::two_sum; 14 | 15 | /// Calculates a sum using separate accumulators for each possible exponent 16 | /// 17 | /// ![][OnlineExactSum] 18 | /// 19 | /// # Examples 20 | /// 21 | /// ``` 22 | /// use accurate::traits::*; 23 | /// use accurate::sum::OnlineExactSum; 24 | /// 25 | /// let s = OnlineExactSum::zero() + 1.0 + 2.0 + 3.0; 26 | /// assert_eq!(6.0f64, s.sum()); 27 | /// ``` 28 | /// 29 | /// # References 30 | /// 31 | /// Based on [Zhu and Hayes 10](http://dx.doi.org/10.1145/1824801.1824815) 32 | #[cfg_attr(doc, embed_doc_image("OnlineExactSum", "images/OnlineExactSum.svg"))] 33 | #[derive(Clone, Debug)] 34 | pub struct OnlineExactSum { 35 | i: usize, 36 | a1: Box<[F]>, 37 | a2: Box<[F]>, 38 | } 39 | 40 | impl OnlineExactSum 41 | where 42 | F: Float + FloatFormat + RawExponent, 43 | { 44 | fn new() -> Self { 45 | // Steps 1, 2, 3 46 | OnlineExactSum { 47 | i: 0, 48 | a1: vec![F::zero(); F::base_pow_exponent_digits()].into_boxed_slice(), 49 | a2: vec![F::zero(); F::base_pow_exponent_digits()].into_boxed_slice(), 50 | } 51 | } 52 | 53 | #[inline(never)] 54 | fn compact(&mut self) { 55 | // Step 4(6)(a) 56 | let mut b1v = vec![F::zero(); F::base_pow_exponent_digits()].into_boxed_slice(); 57 | let mut b2v = vec![F::zero(); F::base_pow_exponent_digits()].into_boxed_slice(); 58 | 59 | // Step 4(6)(b) 60 | for &y in self.a1.iter().chain(self.a2.iter()) { 61 | // Step 4(6)(b)(i) 62 | let j = y.raw_exponent(); 63 | // These accesses are guaranteed to be within bounds, because: 64 | debug_assert_eq!(b1v.len(), F::base_pow_exponent_digits()); 65 | debug_assert_eq!(b2v.len(), F::base_pow_exponent_digits()); 66 | debug_assert!(j < F::base_pow_exponent_digits()); 67 | let b1 = unsafe { b1v.get_unchecked_mut(j) }; 68 | let b2 = unsafe { b2v.get_unchecked_mut(j) }; 69 | 70 | // Step 4(6)(b)(ii) 71 | let (b, e) = two_sum(*b1, y); 72 | *b1 = b; 73 | 74 | // Step 4(6)(b)(iii) 75 | *b2 = *b2 + e; 76 | } 77 | 78 | // Step 4(6)(c) 79 | self.a1 = b1v; 80 | self.a2 = b2v; 81 | 82 | // Step 4(6)(d) 83 | self.i = 2 * F::base_pow_exponent_digits(); 84 | } 85 | } 86 | 87 | impl SumAccumulator for OnlineExactSum 88 | where 89 | F: Float + IFastSum + FloatFormat + RawExponent, 90 | { 91 | fn zero() -> Self { 92 | Self::new() 93 | } 94 | 95 | #[inline] 96 | fn sum(self) -> F { 97 | // Step 5 98 | let mut a = self.a1.into_vec(); 99 | let mut b = self.a2.into_vec(); 100 | a.append(&mut b); 101 | a.retain(|&x| x != F::zero()); 102 | 103 | // Step 6 104 | i_fast_sum_in_place(&mut a[..]) 105 | } 106 | } 107 | 108 | impl Add for OnlineExactSum 109 | where 110 | OnlineExactSum: AddAssign, 111 | { 112 | type Output = Self; 113 | 114 | #[inline] 115 | fn add(mut self, rhs: F) -> Self::Output { 116 | self += rhs; 117 | self 118 | } 119 | } 120 | 121 | impl From for OnlineExactSum 122 | where 123 | F: Float + FloatFormat + RawExponent, 124 | { 125 | fn from(x: F) -> Self { 126 | Self::new() + x 127 | } 128 | } 129 | 130 | impl Add for OnlineExactSum 131 | where 132 | F: Float + IFastSum + FloatFormat + RawExponent, 133 | { 134 | type Output = Self; 135 | 136 | #[inline] 137 | fn add(self, rhs: Self) -> Self::Output { 138 | self.absorb(rhs.a1.iter().cloned().chain(rhs.a2.iter().cloned())) 139 | } 140 | } 141 | 142 | unsafe impl Send for OnlineExactSum where F: Send {} 143 | 144 | impl AddAssign for OnlineExactSum 145 | where 146 | F: Float + FloatFormat + RawExponent, 147 | { 148 | #[inline] 149 | fn add_assign(&mut self, rhs: F) { 150 | // Step 4(2) 151 | { 152 | let j = rhs.raw_exponent(); 153 | // These accesses are guaranteed to be within bounds, because: 154 | debug_assert_eq!(self.a1.len(), F::base_pow_exponent_digits()); 155 | debug_assert_eq!(self.a2.len(), F::base_pow_exponent_digits()); 156 | debug_assert!(j < F::base_pow_exponent_digits()); 157 | let a1 = unsafe { self.a1.get_unchecked_mut(j) }; 158 | let a2 = unsafe { self.a2.get_unchecked_mut(j) }; 159 | 160 | // Step 4(3) 161 | let (a, e) = two_sum(*a1, rhs); 162 | *a1 = a; 163 | 164 | // Step 4(4) 165 | *a2 = *a2 + e; 166 | } 167 | 168 | // Step 4(5) 169 | // This addition is guaranteed not to overflow because the next step ascertains that (at 170 | // this point): 171 | debug_assert!(self.i < F::base_pow_significand_digits_half()); 172 | // and (for `f32` and `f64`) we have: 173 | debug_assert!(F::base_pow_significand_digits_half() < usize::MAX); 174 | // thus we can assume: 175 | debug_assert!(self.i.checked_add(1).is_some()); 176 | self.i += 1; 177 | 178 | // Step 4(6) 179 | if self.i >= F::base_pow_significand_digits_half() { 180 | self.compact(); 181 | } 182 | } 183 | } 184 | -------------------------------------------------------------------------------- /src/sum/sumk.rs: -------------------------------------------------------------------------------- 1 | //! The `SumK` algorithm 2 | 3 | #[cfg(doc)] 4 | use embed_doc_image::embed_doc_image; 5 | 6 | use crate::sum::cascaded::Cascaded; 7 | use crate::sum::NaiveSum; 8 | use crate::util::Knuth; 9 | 10 | /// Calculates a sum using cascaded accumulators for the remainder terms 11 | /// 12 | /// See also `Sum2`... `Sum9`. 13 | /// 14 | /// ![][SumK] 15 | /// 16 | /// # References 17 | /// 18 | /// Based on [Ogita, Rump and Oishi 05](http://dx.doi.org/10.1137/030601818) 19 | #[cfg_attr(doc, embed_doc_image("SumK", "images/SumK.svg"))] 20 | pub type SumK = Cascaded; 21 | 22 | /// `SumK` with two cascaded accumulators 23 | /// 24 | /// ![][SumK] 25 | /// 26 | /// # Examples 27 | /// 28 | /// ``` 29 | /// use accurate::traits::*; 30 | /// use accurate::sum::Sum2; 31 | /// 32 | /// let s = Sum2::zero() + 1.0 + 2.0 + 3.0; 33 | /// assert_eq!(6.0f64, s.sum()); 34 | /// ``` 35 | /// 36 | /// # References 37 | /// 38 | /// Based on [Ogita, Rump and Oishi 05](http://dx.doi.org/10.1137/030601818) 39 | #[cfg_attr(doc, embed_doc_image("SumK", "images/SumK.svg"))] 40 | pub type Sum2 = SumK>; 41 | 42 | /// `SumK` with three cascaded accumulators 43 | /// 44 | /// ![][SumK] 45 | /// 46 | /// # Examples 47 | /// 48 | /// ``` 49 | /// use accurate::traits::*; 50 | /// use accurate::sum::Sum3; 51 | /// 52 | /// let s = Sum3::zero() + 1.0 + 2.0 + 3.0; 53 | /// assert_eq!(6.0f64, s.sum()); 54 | /// ``` 55 | /// 56 | /// # References 57 | /// 58 | /// Based on [Ogita, Rump and Oishi 05](http://dx.doi.org/10.1137/030601818) 59 | #[cfg_attr(doc, embed_doc_image("SumK", "images/SumK.svg"))] 60 | pub type Sum3 = SumK>; 61 | 62 | /// `SumK` with four cascaded accumulators 63 | /// 64 | /// ![][SumK] 65 | /// 66 | /// # Examples 67 | /// 68 | /// ``` 69 | /// use accurate::traits::*; 70 | /// use accurate::sum::Sum4; 71 | /// 72 | /// let s = Sum4::zero() + 1.0 + 2.0 + 3.0; 73 | /// assert_eq!(6.0f64, s.sum()); 74 | /// ``` 75 | /// 76 | /// # References 77 | /// 78 | /// Based on [Ogita, Rump and Oishi 05](http://dx.doi.org/10.1137/030601818) 79 | #[cfg_attr(doc, embed_doc_image("SumK", "images/SumK.svg"))] 80 | pub type Sum4 = SumK>; 81 | 82 | /// `SumK` with five cascaded accumulators 83 | /// 84 | /// ![][SumK] 85 | /// 86 | /// # Examples 87 | /// 88 | /// ``` 89 | /// use accurate::traits::*; 90 | /// use accurate::sum::Sum5; 91 | /// 92 | /// let s = Sum5::zero() + 1.0 + 2.0 + 3.0; 93 | /// assert_eq!(6.0f64, s.sum()); 94 | /// ``` 95 | /// 96 | /// # References 97 | /// 98 | /// Based on [Ogita, Rump and Oishi 05](http://dx.doi.org/10.1137/030601818) 99 | #[cfg_attr(doc, embed_doc_image("SumK", "images/SumK.svg"))] 100 | pub type Sum5 = SumK>; 101 | 102 | /// `SumK` with six cascaded accumulators 103 | /// 104 | /// ![][SumK] 105 | /// 106 | /// # Examples 107 | /// 108 | /// ``` 109 | /// use accurate::traits::*; 110 | /// use accurate::sum::Sum6; 111 | /// 112 | /// let s = Sum6::zero() + 1.0 + 2.0 + 3.0; 113 | /// assert_eq!(6.0f64, s.sum()); 114 | /// ``` 115 | /// 116 | /// # References 117 | /// 118 | /// Based on [Ogita, Rump and Oishi 05](http://dx.doi.org/10.1137/030601818) 119 | #[cfg_attr(doc, embed_doc_image("SumK", "images/SumK.svg"))] 120 | pub type Sum6 = SumK>; 121 | 122 | /// `SumK` with seven cascaded accumulators 123 | /// 124 | /// ![][SumK] 125 | /// 126 | /// # Examples 127 | /// 128 | /// ``` 129 | /// use accurate::traits::*; 130 | /// use accurate::sum::Sum7; 131 | /// 132 | /// let s = Sum7::zero() + 1.0 + 2.0 + 3.0; 133 | /// assert_eq!(6.0f64, s.sum()); 134 | /// ``` 135 | /// 136 | /// # References 137 | /// 138 | /// Based on [Ogita, Rump and Oishi 05](http://dx.doi.org/10.1137/030601818) 139 | #[cfg_attr(doc, embed_doc_image("SumK", "images/SumK.svg"))] 140 | pub type Sum7 = SumK>; 141 | 142 | /// `SumK` with eight cascaded accumulators 143 | /// 144 | /// ![][SumK] 145 | /// 146 | /// # Examples 147 | /// 148 | /// ``` 149 | /// use accurate::traits::*; 150 | /// use accurate::sum::Sum8; 151 | /// 152 | /// let s = Sum8::zero() + 1.0 + 2.0 + 3.0; 153 | /// assert_eq!(6.0f64, s.sum()); 154 | /// ``` 155 | /// 156 | /// # References 157 | /// 158 | /// Based on [Ogita, Rump and Oishi 05](http://dx.doi.org/10.1137/030601818) 159 | #[cfg_attr(doc, embed_doc_image("SumK", "images/SumK.svg"))] 160 | pub type Sum8 = SumK>; 161 | 162 | /// `SumK` with nine cascaded accumulators 163 | /// 164 | /// ![][SumK] 165 | /// 166 | /// # Examples 167 | /// 168 | /// ``` 169 | /// use accurate::traits::*; 170 | /// use accurate::sum::Sum9; 171 | /// 172 | /// let s = Sum9::zero() + 1.0 + 2.0 + 3.0; 173 | /// assert_eq!(6.0f64, s.sum()); 174 | /// ``` 175 | /// 176 | /// # References 177 | /// 178 | /// Based on [Ogita, Rump and Oishi 05](http://dx.doi.org/10.1137/030601818) 179 | #[cfg_attr(doc, embed_doc_image("SumK", "images/SumK.svg"))] 180 | pub type Sum9 = SumK>; 181 | -------------------------------------------------------------------------------- /src/sum/traits.rs: -------------------------------------------------------------------------------- 1 | //! Summation traits 2 | 3 | use std::ops::{Add, AddAssign}; 4 | 5 | use num_traits::Zero; 6 | 7 | #[cfg(feature = "parallel")] 8 | use rayon::iter::ParallelIterator; 9 | 10 | pub use crate::sum::ifastsum::IFastSum; 11 | 12 | #[cfg(feature = "parallel")] 13 | use crate::sum::SumConsumer; 14 | 15 | /// Accumulates terms of a sum 16 | pub trait SumAccumulator: Add + AddAssign + From + Clone { 17 | /// Initial value for an accumulator 18 | fn zero() -> Self 19 | where 20 | F: Zero, 21 | { 22 | Self::from(F::zero()) 23 | } 24 | 25 | /// The sum of all terms accumulated so far 26 | fn sum(self) -> F; 27 | 28 | /// Absorb the items of an iterator into the accumulator 29 | /// 30 | /// # Examples 31 | /// 32 | /// ``` 33 | /// use accurate::traits::*; 34 | /// use accurate::sum::Sum2; 35 | /// 36 | /// let s = Sum2::zero().absorb(vec![1.0, 2.0, 3.0]); 37 | /// assert_eq!(6.0f64, s.sum()) 38 | /// ``` 39 | fn absorb(self, it: I) -> Self 40 | where 41 | I: IntoIterator, 42 | { 43 | it.into_iter().fold(self, |acc, x| acc + x) 44 | } 45 | } 46 | 47 | /// Sums the items of an iterator 48 | /// 49 | /// # Examples 50 | /// 51 | /// ``` 52 | /// use accurate::traits::*; 53 | /// use accurate::sum::Sum2; 54 | /// 55 | /// let s = vec![1.0, 2.0, 3.0].sum_with_accumulator::>(); 56 | /// assert_eq!(6.0f64, s); 57 | /// ``` 58 | pub trait SumWithAccumulator { 59 | /// Sums the items of an iterator 60 | fn sum_with_accumulator(self) -> F 61 | where 62 | Acc: SumAccumulator, 63 | F: Zero; 64 | } 65 | 66 | impl SumWithAccumulator for I 67 | where 68 | I: IntoIterator, 69 | { 70 | fn sum_with_accumulator(self) -> F 71 | where 72 | Acc: SumAccumulator, 73 | F: Zero, 74 | { 75 | Acc::zero().absorb(self).sum() 76 | } 77 | } 78 | 79 | /// A `SumAccumulator` that can be used in parallel computations 80 | #[cfg(feature = "parallel")] 81 | pub trait ParallelSumAccumulator: 82 | SumAccumulator + Add + Send + Sized 83 | { 84 | /// Turns an accumulator into a consumer 85 | #[inline] 86 | fn into_consumer(self) -> SumConsumer { 87 | SumConsumer(self) 88 | } 89 | } 90 | 91 | #[cfg(feature = "parallel")] 92 | impl ParallelSumAccumulator for Acc where 93 | Acc: SumAccumulator + Add + Send + Sized 94 | { 95 | } 96 | 97 | /// Sums the items of an iterator, possibly in parallel 98 | /// 99 | /// # Examples 100 | /// 101 | /// ``` 102 | /// use rayon::prelude::*; 103 | /// 104 | /// use accurate::traits::*; 105 | /// use accurate::sum::OnlineExactSum; 106 | /// 107 | /// # fn main() { 108 | /// let s = vec![1.0, 2.0, 3.0].par_iter().map(|&x| x) 109 | /// .parallel_sum_with_accumulator::>(); 110 | /// assert_eq!(6.0f64, s); 111 | /// # } 112 | /// ``` 113 | #[cfg(feature = "parallel")] 114 | pub trait ParallelSumWithAccumulator: ParallelIterator 115 | where 116 | F: Send, 117 | { 118 | /// Sums the items of an iterator, possibly in parallel 119 | fn parallel_sum_with_accumulator(self) -> F 120 | where 121 | Acc: ParallelSumAccumulator, 122 | F: Zero, 123 | { 124 | self.drive_unindexed(Acc::zero().into_consumer()).sum() 125 | } 126 | } 127 | 128 | #[cfg(feature = "parallel")] 129 | impl ParallelSumWithAccumulator for T 130 | where 131 | T: ParallelIterator, 132 | F: Zero + Send, 133 | { 134 | } 135 | -------------------------------------------------------------------------------- /src/util.rs: -------------------------------------------------------------------------------- 1 | //! Common infrastructure 2 | 3 | use cfg_if::cfg_if; 4 | 5 | use num_traits::Float; 6 | 7 | #[cfg(feature = "parallel")] 8 | use std::ops::Add; 9 | 10 | #[cfg(feature = "parallel")] 11 | use rayon::iter::plumbing::Reducer; 12 | 13 | pub mod traits; 14 | 15 | use self::traits::{Round3, TwoProduct, TwoSum}; 16 | 17 | /// Dekker's two term sum transformation 18 | #[derive(Copy, Clone, Debug)] 19 | pub struct Dekker; 20 | 21 | impl TwoSum for Dekker 22 | where 23 | F: Float, 24 | { 25 | #[inline] 26 | fn two_sum(a: F, b: F) -> (F, F) { 27 | let x = a + b; 28 | let y = (a - x) + b; 29 | (x, y) 30 | } 31 | } 32 | 33 | /// Neumaier's error-free two term sum transformation 34 | #[derive(Copy, Clone, Debug)] 35 | pub struct Neumaier; 36 | 37 | impl TwoSum for Neumaier 38 | where 39 | F: Float, 40 | { 41 | #[inline] 42 | fn two_sum(a: F, b: F) -> (F, F) { 43 | if a.abs() >= b.abs() { 44 | Dekker::two_sum(a, b) 45 | } else { 46 | Dekker::two_sum(b, a) 47 | } 48 | } 49 | } 50 | 51 | /// Knuth's branch-less Error-free transformations of two term sums 52 | /// 53 | /// # References 54 | /// 55 | /// From Knuth's AoCP, Volume 2: Seminumerical Algorithms 56 | #[derive(Copy, Clone, Debug)] 57 | pub struct Knuth; 58 | 59 | impl TwoSum for Knuth 60 | where 61 | F: Float, 62 | { 63 | #[inline] 64 | fn two_sum(a: F, b: F) -> (F, F) { 65 | let x = a + b; 66 | let z = x - a; 67 | let y = (a - (x - z)) + (b - z); 68 | (x, y) 69 | } 70 | } 71 | 72 | /// Knuth's branch-less Error-free transformations of two term sums 73 | pub fn two_sum(a: F, b: F) -> (F, F) 74 | where 75 | F: Float, 76 | { 77 | Knuth::two_sum(a, b) 78 | } 79 | 80 | cfg_if! { 81 | if #[cfg(feature = "fma")] { 82 | /// Product transformation 83 | /// 84 | /// Transforms a product `a * b` into the pair `(x, y)` so that 85 | /// 86 | /// ```not_rust 87 | /// x = fl(a * b) 88 | /// ``` 89 | /// 90 | /// is the product of `a` and `b` with floating point rounding applied and 91 | /// 92 | /// ```not_rust 93 | /// y = a * b - x 94 | /// ``` 95 | /// 96 | /// is the remainder of the product. 97 | /// 98 | /// # References 99 | /// 100 | /// Based on [Ogita, Rump and Oishi 05](http://dx.doi.org/10.1137/030601818) 101 | #[inline] 102 | pub fn two_product(a: F, b: F) -> (F, F) 103 | where F: TwoProduct 104 | { 105 | let x = a * b; 106 | let y = a.mul_add(b, -x); 107 | (x, y) 108 | } 109 | } else { 110 | /// Product transformation 111 | /// 112 | /// Transforms a product `a * b` into the pair `(x, y)` so that 113 | /// 114 | /// ```not_rust 115 | /// x = fl(a * b) 116 | /// ``` 117 | /// 118 | /// is the product of `a` and `b` with floating point rounding applied and 119 | /// 120 | /// ```not_rust 121 | /// y = a * b - x 122 | /// ``` 123 | /// 124 | /// is the remainder of the product. 125 | /// 126 | /// # References 127 | /// 128 | /// Based on [Dekker 71](http://dx.doi.org/10.1007/BF01397083) 129 | #[inline] 130 | pub fn two_product(x: F, y: F) -> (F, F) 131 | where F: TwoProduct 132 | { 133 | let a = x * y; 134 | let (x1, x2) = x.split(); 135 | let (y1, y2) = y.split(); 136 | let b = x2 * y2 - (((a - x1 * y1) - x2 * y1) - x1 * y2); 137 | (a, b) 138 | } 139 | } 140 | } 141 | 142 | /// Correctly rounded sum of three non-overlapping numbers 143 | /// 144 | /// Calculates the correctly rounded sum of three numbers `s0`, `s1` and `s2` which are 145 | /// non-overlapping, i.e.: 146 | /// 147 | /// ```not_rust 148 | /// s0.abs() > s1.abs() > s2.abs() 149 | /// fl(s0 + s1) = s0 150 | /// fl(s1 + s2) = s1 151 | /// ``` 152 | /// 153 | /// # References 154 | /// 155 | /// Based on [Zhu and Hayes 09](http://dx.doi.org/10.1137/070710020) 156 | #[inline] 157 | pub fn round3(s0: F, s1: F, s2: F) -> F 158 | where 159 | F: Round3, 160 | { 161 | debug_assert!(s0 == s0 + s1); 162 | debug_assert!(s1 == s1 + s2); 163 | if s1.has_half_ulp_form() && s1.sign() == s2.sign() { 164 | s0 + if s1.is_sign_positive() { 165 | s1.next() 166 | } else { 167 | s1.prev() 168 | } 169 | } else { 170 | s0 171 | } 172 | } 173 | 174 | /// Reduce two parallel results using `Add` 175 | #[cfg(feature = "parallel")] 176 | #[derive(Copy, Clone, Debug)] 177 | pub struct AddReducer; 178 | 179 | #[cfg(feature = "parallel")] 180 | impl Reducer for AddReducer 181 | where 182 | Acc: Add, 183 | { 184 | #[inline] 185 | fn reduce(self, left: Acc, right: Acc) -> Acc { 186 | left + right 187 | } 188 | } 189 | -------------------------------------------------------------------------------- /src/util/traits.rs: -------------------------------------------------------------------------------- 1 | //! Common traits 2 | 3 | use cfg_if::cfg_if; 4 | 5 | use ieee754::Ieee754; 6 | 7 | use num_traits::{Float, PrimInt, ToPrimitive, Zero}; 8 | 9 | /// Error-free transformations of two term sums 10 | pub trait TwoSum: Clone { 11 | /// Sum transformation 12 | /// 13 | /// Transforms a sum `a + b` into the pair `(x, y)` where 14 | /// 15 | /// ```not_rust 16 | /// x = fl(a + b) 17 | /// ``` 18 | /// 19 | /// is the sum of `a` and `b` with floating point rounding applied and 20 | /// 21 | /// ```not_rust 22 | /// y = a + b - x 23 | /// ``` 24 | /// 25 | /// is the remainder of the addition. 26 | fn two_sum(a: F, b: F) -> (F, F); 27 | } 28 | 29 | /// Split a floating-point number 30 | pub trait Split: Float { 31 | /// Split factor used in the algorithm 32 | fn split_factor() -> Self; 33 | 34 | /// Split a floating-point number 35 | /// 36 | /// Splits a number `x` into two parts: 37 | /// 38 | /// ```not_rust 39 | /// x = h + t 40 | /// ``` 41 | /// 42 | /// with `h` and `t` nonoverlapping and `t.abs() <= h.abs()` 43 | /// 44 | /// # References 45 | /// 46 | /// Due to Veltkamp, published in [Dekker 71](http://dx.doi.org/10.1007/BF01397083) 47 | #[inline] 48 | fn split(self) -> (Self, Self) { 49 | let x = self; 50 | let c = Self::split_factor() * x; 51 | let h = c - (c - x); 52 | let t = x - h; 53 | (h, t) 54 | } 55 | } 56 | 57 | impl Split for f32 { 58 | #[inline] 59 | fn split_factor() -> Self { 60 | 4097.0 61 | } 62 | } 63 | 64 | impl Split for f64 { 65 | #[inline] 66 | fn split_factor() -> Self { 67 | 134_217_729.0 68 | } 69 | } 70 | 71 | cfg_if! { 72 | if #[cfg(feature = "fma")] { 73 | /// Product transformation 74 | pub trait TwoProduct: Float { } 75 | 76 | impl TwoProduct for F where F: Float { } 77 | } else { 78 | /// Product transformation 79 | pub trait TwoProduct: Float + Split { } 80 | 81 | impl TwoProduct for F where F: Float + Split { } 82 | } 83 | } 84 | 85 | /// Half a unit in the last place (ULP) 86 | pub trait HalfUlp { 87 | /// Check whether something has the form of half a ULP 88 | fn has_half_ulp_form(self) -> bool; 89 | 90 | /// Calculate half a ULP of a number 91 | fn half_ulp(self) -> Self; 92 | } 93 | 94 | impl HalfUlp for F 95 | where 96 | F: Float + Ieee754, 97 | F::Significand: Zero + Eq, 98 | { 99 | #[inline] 100 | fn has_half_ulp_form(self) -> bool { 101 | // self is not zero and significand has all zero visible bits 102 | self != F::zero() && self.decompose_raw().2 == F::Significand::zero() 103 | } 104 | 105 | #[inline] 106 | fn half_ulp(self) -> Self { 107 | self.ulp().unwrap_or_else(Self::zero) / F::one().exp2() 108 | } 109 | } 110 | 111 | /// Correctly rounded sum of three non-overlapping numbers 112 | pub trait Round3: Float + Ieee754 + HalfUlp {} 113 | 114 | impl Round3 for F where F: Float + Ieee754 + HalfUlp {} 115 | 116 | /// Describes the layout of a floating-point number 117 | pub trait FloatFormat { 118 | /// The number format's base 119 | fn base() -> u32; 120 | 121 | /// The length of the number format's significand field 122 | fn significand_digits() -> u32; 123 | 124 | /// The length of the number format's exponent field 125 | fn exponent_digits() -> u32; 126 | 127 | /// The base raised to the power of the exponent`s length 128 | #[inline] 129 | fn base_pow_exponent_digits() -> usize { 130 | Self::base() 131 | .to_usize() 132 | .expect("floating-point base cannot be converted to usize") 133 | .pow(Self::exponent_digits()) 134 | } 135 | 136 | /// The base raised to the power of half the mantissa`s length 137 | #[inline] 138 | fn base_pow_significand_digits_half() -> usize { 139 | Self::base() 140 | .to_usize() 141 | .expect("floating-point base cannot be converted to usize") 142 | .pow(Self::significand_digits() / 2) 143 | } 144 | } 145 | 146 | impl FloatFormat for f32 { 147 | #[inline] 148 | fn base() -> u32 { 149 | 2 150 | } 151 | 152 | #[inline] 153 | fn significand_digits() -> u32 { 154 | 24 155 | } 156 | 157 | #[inline] 158 | fn exponent_digits() -> u32 { 159 | 8 160 | } 161 | 162 | #[inline] 163 | fn base_pow_exponent_digits() -> usize { 164 | 256 165 | } 166 | 167 | #[inline] 168 | fn base_pow_significand_digits_half() -> usize { 169 | 4096 170 | } 171 | } 172 | 173 | impl FloatFormat for f64 { 174 | #[inline] 175 | fn base() -> u32 { 176 | 2 177 | } 178 | 179 | #[inline] 180 | fn significand_digits() -> u32 { 181 | 53 182 | } 183 | 184 | #[inline] 185 | fn exponent_digits() -> u32 { 186 | 11 187 | } 188 | 189 | #[inline] 190 | fn base_pow_exponent_digits() -> usize { 191 | 2048 192 | } 193 | 194 | #[inline] 195 | fn base_pow_significand_digits_half() -> usize { 196 | 67_108_864 197 | } 198 | } 199 | 200 | /// Extract the raw exponent of a floating-point number 201 | pub trait RawExponent { 202 | /// The raw bits of the exponent 203 | fn raw_exponent(self) -> usize; 204 | } 205 | 206 | impl RawExponent for F 207 | where 208 | F: Ieee754, 209 | F::RawExponent: PrimInt, 210 | { 211 | #[inline] 212 | fn raw_exponent(self) -> usize { 213 | self.decompose_raw() 214 | .1 215 | .to_usize() 216 | .expect("exponent does not fit in a usize.") 217 | } 218 | } 219 | -------------------------------------------------------------------------------- /tests/all.rs: -------------------------------------------------------------------------------- 1 | extern crate accurate; 2 | extern crate rand; 3 | 4 | #[cfg(feature = "parallel")] 5 | extern crate rayon; 6 | 7 | use rand::distributions::Standard; 8 | use rand::prelude::*; 9 | 10 | #[cfg(feature = "parallel")] 11 | use rayon::prelude::*; 12 | 13 | use accurate::sum::OnlineExactSum; 14 | use accurate::traits::*; 15 | 16 | fn mk_vec(n: usize) -> Vec 17 | where 18 | Standard: Distribution, 19 | { 20 | let rng = rand::thread_rng(); 21 | rng.sample_iter::(&Standard).take(n).collect() 22 | } 23 | 24 | #[test] 25 | fn oes_add() { 26 | let xs = mk_vec::(100_000); 27 | let ys = mk_vec::(100_000); 28 | 29 | let s = OnlineExactSum::zero() 30 | .absorb(xs.iter().cloned()) 31 | .absorb(ys.iter().cloned()); 32 | 33 | let s1 = OnlineExactSum::zero().absorb(xs.iter().cloned()); 34 | 35 | let s2 = OnlineExactSum::zero().absorb(ys.iter().cloned()); 36 | 37 | assert_eq!(s.sum(), (s1 + s2).sum()); 38 | } 39 | 40 | #[cfg(feature = "parallel")] 41 | #[test] 42 | fn parallel_sum_oes() { 43 | let xs = mk_vec::(100_000); 44 | 45 | let s1 = xs 46 | .par_iter() 47 | .map(|&x| x) 48 | .parallel_sum_with_accumulator::>(); 49 | let s2 = xs 50 | .iter() 51 | .cloned() 52 | .sum_with_accumulator::>(); 53 | 54 | assert_eq!(s1, s2); 55 | } 56 | --------------------------------------------------------------------------------