├── .github └── workflows │ └── rust.yml ├── .gitignore ├── Cargo.toml ├── README.md ├── fastapprox ├── Cargo.toml └── src │ ├── bits.rs │ ├── fast │ └── mod.rs │ ├── faster │ └── mod.rs │ └── lib.rs └── fastapprox_tests ├── Cargo.toml ├── benches └── tests.rs ├── build.rs └── tests ├── c ├── fastapprox.c └── mod.rs └── compare.rs /.github/workflows/rust.yml: -------------------------------------------------------------------------------- 1 | name: Rust 2 | 3 | on: 4 | push: 5 | branches: [ master, main ] 6 | pull_request: 7 | branches: [ master, main ] 8 | 9 | env: 10 | CARGO_TERM_COLOR: always 11 | 12 | jobs: 13 | tests: 14 | runs-on: ubuntu-latest 15 | steps: 16 | - name: Install Rust 17 | uses: actions-rs/toolchain@v1 18 | with: 19 | profile: minimal 20 | toolchain: stable 21 | override: true 22 | - name: Checkout Sources 23 | uses: actions/checkout@v2 24 | - name: Run tests 25 | run: | 26 | cargo test 27 | - name: Check formatting 28 | uses: actions-rs/cargo@v1 29 | with: 30 | command: fmt 31 | args: -- --check 32 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | /target/ 2 | **/*.rs.bk 3 | Cargo.lock 4 | -------------------------------------------------------------------------------- /Cargo.toml: -------------------------------------------------------------------------------- 1 | [workspace] 2 | 3 | members = [ 4 | "fastapprox", 5 | "fastapprox_tests", 6 | ] 7 | 8 | [profile.bench] 9 | lto = true 10 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # fastapprox-rs [![Crates.io](https://img.shields.io/crates/v/fastapprox.svg)](https://crates.io/crates/fastapprox) 2 | 3 | Rust version of a [library](https://code.google.com/archive/p/fastapprox/) by Paul Mineiro. 4 | 5 | Fast approximate versions of certain functions that arise in Machine Learning and DSP. 6 | -------------------------------------------------------------------------------- /fastapprox/Cargo.toml: -------------------------------------------------------------------------------- 1 | [package] 2 | name = "fastapprox" 3 | description = "Fast approximate versions of certain functions that arise in machine learning" 4 | version = "0.3.1" 5 | authors = ["Alexey Suslov "] 6 | license = "MIT" 7 | repository = "https://github.com/loony-bean/fastapprox-rs" 8 | keywords = ["math", "machine-learning", "approximation"] 9 | edition = "2021" 10 | -------------------------------------------------------------------------------- /fastapprox/src/bits.rs: -------------------------------------------------------------------------------- 1 | /// Raw transmutation to `u32`. 2 | /// 3 | /// Transmutes the given `f32` into it's raw memory representation. 4 | /// Similar to `f32::to_bits` but even more raw. 5 | #[inline] 6 | pub fn to_bits(x: f32) -> u32 { 7 | unsafe { ::std::mem::transmute::(x) } 8 | } 9 | 10 | /// Raw transmutation from `u32`. 11 | /// 12 | /// Converts the given `u32` containing the float's raw memory representation into the `f32` type. 13 | /// Similar to `f32::from_bits` but even more raw. 14 | #[inline] 15 | pub fn from_bits(x: u32) -> f32 { 16 | unsafe { ::std::mem::transmute::(x) } 17 | } 18 | -------------------------------------------------------------------------------- /fastapprox/src/fast/mod.rs: -------------------------------------------------------------------------------- 1 | use crate::bits::*; 2 | use crate::faster; 3 | 4 | /// Base 2 logarithm. 5 | #[inline] 6 | pub fn log2(x: f32) -> f32 { 7 | let vx = to_bits(x); 8 | let mx = from_bits((vx & 0x007FFFFF_u32) | 0x3f000000); 9 | let mut y = vx as f32; 10 | y *= 1.1920928955078125e-7_f32; 11 | y - 124.22551499_f32 - 1.498030302_f32 * mx - 1.72587999_f32 / (0.3520887068_f32 + mx) 12 | } 13 | 14 | /// Natural logarithm. 15 | #[inline] 16 | pub fn ln(x: f32) -> f32 { 17 | 0.69314718_f32 * log2(x) 18 | } 19 | 20 | /// Raises 2 to a floating point power. 21 | #[inline] 22 | pub fn pow2(p: f32) -> f32 { 23 | let offset = if p < 0.0 { 1.0_f32 } else { 0.0_f32 }; 24 | let clipp = if p < -126.0 { -126.0_f32 } else { p }; 25 | let w = clipp as i32; 26 | let z = clipp - (w as f32) + offset; 27 | let v = ((1 << 23) as f32 28 | * (clipp + 121.2740575_f32 + 27.7280233_f32 / (4.84252568_f32 - z) - 1.49012907_f32 * z)) 29 | as u32; 30 | from_bits(v) 31 | } 32 | 33 | /// Raises a number to a floating point power. 34 | #[inline] 35 | pub fn pow(x: f32, p: f32) -> f32 { 36 | pow2(p * log2(x)) 37 | } 38 | 39 | /// Exponential function. 40 | #[inline] 41 | pub fn exp(p: f32) -> f32 { 42 | pow2(1.442695040_f32 * p) 43 | } 44 | 45 | /// Sigmoid function. 46 | #[inline] 47 | pub fn sigmoid(x: f32) -> f32 { 48 | 1.0_f32 / (1.0_f32 + exp(-x)) 49 | } 50 | 51 | /// Natural logarithm of the Gamma function. 52 | /// 53 | /// Only works for positive values. 54 | #[inline] 55 | pub fn ln_gamma(x: f32) -> f32 { 56 | let logterm = ln(x * (1.0_f32 + x) * (2.0_f32 + x)); 57 | let xp3 = 3.0_f32 + x; 58 | 59 | -2.081061466_f32 - x + 0.0833333_f32 / xp3 - logterm + (2.5_f32 + x) * ln(xp3) 60 | } 61 | 62 | /// Digamma function. 63 | /// 64 | /// Only works for positive values. 65 | #[inline] 66 | pub fn digamma(x: f32) -> f32 { 67 | let twopx = 2.0_f32 + x; 68 | let logterm = ln(twopx); 69 | 70 | (-48.0_f32 + x * (-157.0_f32 + x * (-127.0_f32 - 30.0_f32 * x))) 71 | / (12.0_f32 * x * (1.0_f32 + x) * twopx * twopx) 72 | + logterm 73 | } 74 | 75 | /// Complementary error function. 76 | #[inline] 77 | pub fn erfc(x: f32) -> f32 { 78 | const K: f32 = 3.3509633149424609; 79 | const A: f32 = 0.07219054755431126; 80 | const B: f32 = 15.418191568719577; 81 | const C: f32 = 5.609846028328545; 82 | 83 | let mut v = to_bits(C * x); 84 | let xsq = x * x; 85 | let xquad = xsq * xsq; 86 | 87 | v |= 0x80000000; 88 | 89 | 2.0_f32 / (1.0_f32 + pow2(K * x)) - A * x * (B * xquad - 1.0_f32) * faster::pow2(from_bits(v)) 90 | } 91 | 92 | /// Error function. 93 | #[inline] 94 | pub fn erf(x: f32) -> f32 { 95 | 1.0_f32 - erfc(x) 96 | } 97 | 98 | /// Inverse error function. 99 | #[inline] 100 | pub fn erf_inv(x: f32) -> f32 { 101 | const INVK: f32 = 0.30004578719350504; 102 | const A: f32 = 0.020287853348211326; 103 | const B: f32 = 0.07236892874789555; 104 | const C: f32 = 0.9913030456864257; 105 | const D: f32 = 0.8059775923760193; 106 | 107 | let xsq = x * x; 108 | 109 | INVK * log2((1.0_f32 + x) / (1.0_f32 - x)) + x * (A - B * xsq) / (C - D * xsq) 110 | } 111 | 112 | /// Hyperbolic sine function. 113 | #[inline] 114 | pub fn sinh(p: f32) -> f32 { 115 | 0.5_f32 * (exp(p) - exp(-p)) 116 | } 117 | 118 | /// Hyperbolic cosine function. 119 | #[inline] 120 | pub fn cosh(p: f32) -> f32 { 121 | 0.5_f32 * (exp(p) + exp(-p)) 122 | } 123 | 124 | /// Hyperbolic tangent function. 125 | #[inline] 126 | pub fn tanh(p: f32) -> f32 { 127 | -1.0_f32 + 2.0_f32 / (1.0_f32 + exp(-2.0_f32 * p)) 128 | } 129 | 130 | /// Lambert W function. 131 | #[inline] 132 | pub fn lambertw(x: f32) -> f32 { 133 | const THRESHOLD: f32 = 2.26445; 134 | 135 | let c = if x < THRESHOLD { 136 | 1.546865557_f32 137 | } else { 138 | 1.0_f32 139 | }; 140 | let d = if x < THRESHOLD { 141 | 2.250366841_f32 142 | } else { 143 | 0.0_f32 144 | }; 145 | let a = if x < THRESHOLD { 146 | -0.737769969_f32 147 | } else { 148 | 0.0_f32 149 | }; 150 | 151 | let logterm = ln(c * x + d); 152 | let loglogterm = ln(logterm); 153 | 154 | let minusw = -a - logterm + loglogterm - loglogterm / logterm; 155 | let expminusw = exp(minusw); 156 | let xexpminusw = x * expminusw; 157 | let pexpminusw = xexpminusw - minusw; 158 | 159 | (2.0_f32 * xexpminusw - minusw * (4.0_f32 * xexpminusw - minusw * pexpminusw)) 160 | / (2.0_f32 + pexpminusw * (2.0_f32 - minusw)) 161 | } 162 | 163 | /// Exponent of Lambert W function. 164 | #[inline] 165 | pub fn lambertwexpx(x: f32) -> f32 { 166 | const K: f32 = 1.1765631309; 167 | const A: f32 = 0.94537622168; 168 | 169 | let logarg = x.max(K); 170 | let powarg = if x < K { A * (x - K) } else { 0.0_f32 }; 171 | 172 | let logterm = ln(logarg); 173 | let powterm = faster::pow2(powarg); // don't need accuracy here 174 | 175 | let w = powterm * (logarg - logterm + logterm / logarg); 176 | let logw = ln(w); 177 | let p = x - logw; 178 | 179 | w * (2.0_f32 + p + w * (3.0_f32 + 2.0_f32 * p)) / (2.0_f32 - p + w * (5.0_f32 + 2.0_f32 * w)) 180 | } 181 | 182 | /// Sine of a number in \[-π, π\], in radians. 183 | #[inline] 184 | pub fn sin(x: f32) -> f32 { 185 | const FOUROVERPI: f32 = 1.2732395447351627; 186 | const FOUROVERPISQ: f32 = 0.40528473456935109; 187 | const Q: f32 = 0.78444488374548933; 188 | 189 | let mut p = to_bits(0.20363937680730309_f32); 190 | let mut r = to_bits(0.015124940802184233_f32); 191 | let mut s = to_bits(-0.0032225901625579573_f32); 192 | 193 | let mut v = to_bits(x); 194 | let sign = v & 0x80000000; 195 | v &= 0x7FFFFFFF; 196 | 197 | let qpprox = FOUROVERPI * x - FOUROVERPISQ * x * from_bits(v); 198 | let qpproxsq = qpprox * qpprox; 199 | 200 | p |= sign; 201 | r |= sign; 202 | s ^= sign; 203 | 204 | Q * qpprox + qpproxsq * (from_bits(p) + qpproxsq * (from_bits(r) + qpproxsq * from_bits(s))) 205 | } 206 | 207 | /// Sine in radians. 208 | /// 209 | /// The range reduction technique used here will be hopelessly inaccurate for |x| >> 1000. 210 | #[inline] 211 | pub fn sinfull(x: f32) -> f32 { 212 | const TWOPI: f32 = 6.2831853071795865; 213 | const INVTWOPI: f32 = 0.15915494309189534; 214 | 215 | let k: u32 = (x * INVTWOPI) as u32; 216 | let half = if x < 0_f32 { -0.5_f32 } else { 0.5_f32 }; 217 | sin((half + (k as f32)) * TWOPI - x) 218 | } 219 | 220 | /// Cosine of a number in \[-π, π\], in radians. 221 | /// 222 | /// # Examples 223 | /// 224 | /// ``` 225 | /// assert_eq!(f32::cos(1.0), 0.5403023); 226 | /// assert_eq!(fastapprox::fast::cos(1.0), 0.5402951); 227 | /// ``` 228 | #[inline] 229 | pub fn cos(x: f32) -> f32 { 230 | const HALFPI: f32 = 1.5707963267948966; 231 | const HALFPIMINUSTWOPI: f32 = -4.7123889803846899; 232 | let offset = if x > HALFPI { HALFPIMINUSTWOPI } else { HALFPI }; 233 | sin(x + offset) 234 | } 235 | 236 | /// Cosine in radians. 237 | /// 238 | /// The range reduction technique used here will be hopelessly inaccurate for |x| >> 1000. 239 | /// 240 | /// # Examples 241 | /// 242 | /// ``` 243 | /// assert_eq!(f32::cos(10.0), -0.8390715); 244 | /// assert_eq!(fastapprox::fast::cosfull(10.0), -0.83907986); 245 | /// ``` 246 | #[inline] 247 | pub fn cosfull(x: f32) -> f32 { 248 | const HALFPI: f32 = 1.5707963267948966; 249 | sinfull(x + HALFPI) 250 | } 251 | 252 | /// Tangent of a number in \[-π/2, π/2\], in radians. 253 | #[inline] 254 | pub fn tan(x: f32) -> f32 { 255 | const HALFPI: f32 = 1.5707963267948966; 256 | sin(x) / sin(x + HALFPI) 257 | } 258 | 259 | /// Tangent in radians. 260 | /// 261 | /// The range reduction technique used here will be hopelessly inaccurate for |x| >> 1000. 262 | #[inline] 263 | pub fn tanfull(x: f32) -> f32 { 264 | const TWOPI: f32 = 6.2831853071795865; 265 | const INVTWOPI: f32 = 0.15915494309189534; 266 | 267 | let k: u32 = (x * INVTWOPI) as u32; 268 | let half = if x < 0_f32 { -0.5_f32 } else { 0.5_f32 }; 269 | let xnew = x - (half + k as f32) * TWOPI; 270 | 271 | sin(xnew) / cos(xnew) 272 | } 273 | -------------------------------------------------------------------------------- /fastapprox/src/faster/mod.rs: -------------------------------------------------------------------------------- 1 | use crate::bits::*; 2 | 3 | /// Base 2 logarithm. 4 | #[inline] 5 | pub fn log2(x: f32) -> f32 { 6 | let mut y = to_bits(x) as f32; 7 | y *= 1.1920928955078125e-7_f32; 8 | y - 126.94269504_f32 9 | } 10 | 11 | /// Natural logarithm. 12 | #[inline] 13 | pub fn ln(x: f32) -> f32 { 14 | let mut y = to_bits(x) as f32; 15 | y *= 8.2629582881927490e-8_f32; 16 | y - 87.989971088_f32 17 | } 18 | 19 | /// Raises 2 to a floating point power. 20 | #[inline] 21 | pub fn pow2(p: f32) -> f32 { 22 | let clipp = if p < -126.0 { -126.0_f32 } else { p }; 23 | let v = ((1 << 23) as f32 * (clipp + 126.94269504_f32)) as u32; 24 | from_bits(v) 25 | } 26 | 27 | /// Raises a number to a floating point power. 28 | #[inline] 29 | pub fn pow(x: f32, p: f32) -> f32 { 30 | pow2(p * log2(x)) 31 | } 32 | 33 | /// Exponential function. 34 | #[inline] 35 | pub fn exp(p: f32) -> f32 { 36 | pow2(1.442695040_f32 * p) 37 | } 38 | 39 | /// Sigmoid function. 40 | #[inline] 41 | pub fn sigmoid(x: f32) -> f32 { 42 | 1.0_f32 / (1.0_f32 + exp(-x)) 43 | } 44 | 45 | /// Natural logarithm of the Gamma function. 46 | /// 47 | /// Only works for positive values. 48 | #[inline] 49 | pub fn ln_gamma(x: f32) -> f32 { 50 | -0.0810614667_f32 - x - ln(x) + (0.5_f32 + x) * ln(1.0_f32 + x) 51 | } 52 | 53 | /// Digamma function. 54 | /// 55 | /// Only works for positive values. 56 | #[inline] 57 | pub fn digamma(x: f32) -> f32 { 58 | let onepx = 1.0_f32 + x; 59 | -1.0_f32 / x - 1.0_f32 / (2.0_f32 * onepx) + ln(onepx) 60 | } 61 | 62 | /// Complementary error function. 63 | #[inline] 64 | pub fn erfc(x: f32) -> f32 { 65 | const K: f32 = 3.3509633149424609; 66 | 67 | 2.0_f32 / (1.0_f32 + pow2(K * x)) 68 | } 69 | 70 | /// Error function. 71 | #[inline] 72 | pub fn erf(x: f32) -> f32 { 73 | 1.0_f32 - erfc(x) 74 | } 75 | 76 | /// Inverse error function. 77 | #[inline] 78 | pub fn erf_inv(x: f32) -> f32 { 79 | const INVK: f32 = 0.30004578719350504; 80 | 81 | INVK * log2((1.0_f32 + x) / (1.0_f32 - x)) 82 | } 83 | 84 | /// Hyperbolic sine function. 85 | #[inline] 86 | pub fn sinh(p: f32) -> f32 { 87 | 0.5_f32 * (exp(p) - exp(-p)) 88 | } 89 | 90 | /// Hyperbolic cosine function. 91 | #[inline] 92 | pub fn cosh(p: f32) -> f32 { 93 | 0.5_f32 * (exp(p) + exp(-p)) 94 | } 95 | 96 | /// Hyperbolic tangent function. 97 | #[inline] 98 | pub fn tanh(p: f32) -> f32 { 99 | -1.0_f32 + 2.0_f32 / (1.0_f32 + exp(-2.0_f32 * p)) 100 | } 101 | 102 | /// Lambert W function. 103 | #[inline] 104 | pub fn lambertw(x: f32) -> f32 { 105 | const THRESHOLD: f32 = 2.26445; 106 | 107 | let c = if x < THRESHOLD { 108 | 1.546865557_f32 109 | } else { 110 | 1.0_f32 111 | }; 112 | let d = if x < THRESHOLD { 113 | 2.250366841_f32 114 | } else { 115 | 0.0_f32 116 | }; 117 | let a = if x < THRESHOLD { 118 | -0.737769969_f32 119 | } else { 120 | 0.0_f32 121 | }; 122 | 123 | let logterm = ln(c * x + d); 124 | let loglogterm = ln(logterm); 125 | 126 | let w = a + logterm - loglogterm + loglogterm / logterm; 127 | let expw = exp(-w); 128 | 129 | (w * w + expw * x) / (1.0_f32 + w) 130 | } 131 | 132 | /// Exponent of Lambert W function. 133 | #[inline] 134 | pub fn lambertwexpx(x: f32) -> f32 { 135 | const K: f32 = 1.1765631309; 136 | const A: f32 = 0.94537622168; 137 | 138 | let logarg = x.max(K); 139 | let powarg = if x < K { A * (x - K) } else { 0.0_f32 }; 140 | 141 | let logterm = ln(logarg); 142 | let powterm = pow2(powarg); 143 | 144 | let w = powterm * (logarg - logterm + logterm / logarg); 145 | let logw = ln(w); 146 | 147 | w * (1.0_f32 + x - logw) / (1.0_f32 + w) 148 | } 149 | 150 | /// Sine of a number in \[-π, π\], in radians. 151 | #[inline] 152 | pub fn sin(x: f32) -> f32 { 153 | const FOUROVERPI: f32 = 1.2732395447351627; 154 | const FOUROVERPISQ: f32 = 0.40528473456935109; 155 | const Q: f32 = 0.77633023248007499; 156 | 157 | let mut p = to_bits(0.22308510060189463_f32); 158 | let mut v = to_bits(x); 159 | 160 | let sign: u32 = v & 0x80000000; 161 | v &= 0x7FFFFFFF; 162 | 163 | let qpprox = FOUROVERPI * x - FOUROVERPISQ * x * from_bits(v); 164 | 165 | p |= sign; 166 | 167 | qpprox * (Q + from_bits(p) * qpprox) 168 | } 169 | 170 | /// Sine in radians. 171 | /// 172 | /// The range reduction technique used here will be hopelessly inaccurate for |x| >> 1000. 173 | #[inline] 174 | pub fn sinfull(x: f32) -> f32 { 175 | const TWOPI: f32 = 6.2831853071795865; 176 | const INVTWOPI: f32 = 0.15915494309189534; 177 | 178 | let k: i32 = (x * INVTWOPI) as i32; 179 | let half = if x < 0.0_f32 { -0.5_f32 } else { 0.5_f32 }; 180 | sin((half + (k as f32)) * TWOPI - x) 181 | } 182 | 183 | /// Cosine of a number in \[-π, π\], in radians. 184 | /// 185 | /// # Examples 186 | /// 187 | /// ``` 188 | /// assert_eq!(f32::cos(1.0), 0.5403023); 189 | /// assert_eq!(fastapprox::faster::cos(1.0), 0.5357177); 190 | /// ``` 191 | #[inline] 192 | pub fn cos(x: f32) -> f32 { 193 | const TWOOVERPI: f32 = 0.63661977236758134; 194 | const P: f32 = 0.54641335845679634; 195 | 196 | let v = to_bits(x) & 0x7FFFFFFF; 197 | 198 | let qpprox = 1.0_f32 - TWOOVERPI * from_bits(v); 199 | 200 | qpprox + P * qpprox * (1.0_f32 - qpprox * qpprox) 201 | } 202 | 203 | /// Cosine in radians. 204 | /// 205 | /// The range reduction technique used here will be hopelessly inaccurate for |x| >> 1000. 206 | /// 207 | /// # Examples 208 | /// 209 | /// ``` 210 | /// assert_eq!(f32::cos(10.0), -0.8390715); 211 | /// assert_eq!(fastapprox::faster::cosfull(10.0), -0.8394889); 212 | /// ``` 213 | #[inline] 214 | pub fn cosfull(x: f32) -> f32 { 215 | const HALFPI: f32 = 1.5707963267948966; 216 | sinfull(x + HALFPI) 217 | } 218 | 219 | /// Tangent of a number in \[-π/2, π/2\], in radians. 220 | #[inline] 221 | pub fn tan(x: f32) -> f32 { 222 | sin(x) / cos(x) 223 | } 224 | 225 | /// Tangent in radians. 226 | /// 227 | /// The range reduction technique used here will be hopelessly inaccurate for |x| >> 1000. 228 | #[inline] 229 | pub fn tanfull(x: f32) -> f32 { 230 | const TWOPI: f32 = 6.2831853071795865; 231 | const INVTWOPI: f32 = 0.15915494309189534; 232 | 233 | let k: i32 = (x * INVTWOPI) as i32; 234 | let half = if x < 0.0_f32 { -0.5_f32 } else { 0.5_f32 }; 235 | let xnew = x - (half + (k as f32)) * TWOPI; 236 | 237 | sin(xnew) / cos(xnew) 238 | } 239 | -------------------------------------------------------------------------------- /fastapprox/src/lib.rs: -------------------------------------------------------------------------------- 1 | #![deny(missing_docs)] 2 | 3 | //! # fastapprox 4 | //! 5 | //! Rust version of a [library](https://code.google.com/archive/p/fastapprox/) by Paul Mineiro. 6 | //! 7 | //! Fast approximate versions of certain functions that arise in machine learning. 8 | //! 9 | //! E.g. in [Vowpal Wabbit](https://github.com/JohnLangford/vowpal_wabbit) this is one of the many clever tricks used to reach it's incredible training speed. 10 | //! 11 | //! Module names `fast` and `faster` come from the original work, and represent sets of the same algorithms with different speed-accuracy levels. 12 | //! 13 | //! Although approximate functions could give you some speedup (but not necessarily!), some warnings should be provided: 14 | //! 15 | //! - Run your own benchmarks 16 | //! - Make sure math is a bottleneck in your algorithm 17 | //! - Pay attention to convergence 18 | //! - SIMD versions are not implemented (yet, until `simd` crate will graduate from the nursery) 19 | //! 20 | //! ## Benchmarks 21 | //! Running `cargo bench` on MacBook Pro (Late 2013), 2.6 GHz Intel Core i7, gives the following output: 22 | //! 23 | //! ```text 24 | //! test cos_fast ... bench: 3,674 ns/iter (+/- 778) 25 | //! test cos_faster ... bench: 1,559 ns/iter (+/- 206) 26 | //! test cos_std ... bench: 7,329 ns/iter (+/- 104) 27 | //! test cosfull_fast ... bench: 6,880 ns/iter (+/- 1,021) 28 | //! test cosfull_faster ... bench: 4,238 ns/iter (+/- 183) 29 | //! test cosh_fast ... bench: 8,270 ns/iter (+/- 4,190) 30 | //! test cosh_faster ... bench: 2,451 ns/iter (+/- 175) 31 | //! test cosh_std ... bench: 4,407 ns/iter (+/- 798) 32 | //! test digamma_fast ... bench: 4,644 ns/iter (+/- 1,126) 33 | //! test digamma_faster ... bench: 3,770 ns/iter (+/- 417) 34 | //! test digamma_special ... bench: 16,260 ns/iter (+/- 1,374) 35 | //! test digamma_statrs ... bench: 14,401 ns/iter (+/- 3,198) 36 | //! test erf_fast ... bench: 54,401 ns/iter (+/- 20,448) 37 | //! test erf_faster ... bench: 2,359 ns/iter (+/- 426) 38 | //! test erf_inv_fast ... bench: 5,958 ns/iter (+/- 2,794) 39 | //! test erf_inv_faster ... bench: 2,113 ns/iter (+/- 153) 40 | //! test erf_inv_statrs ... bench: 769 ns/iter (+/- 83) 41 | //! test erf_special ... bench: 4,948 ns/iter (+/- 1,443) 42 | //! test erf_statrs ... bench: 6,287 ns/iter (+/- 246) 43 | //! test erfc_fast ... bench: 54,201 ns/iter (+/- 6,307) 44 | //! test erfc_faster ... bench: 2,052 ns/iter (+/- 112) 45 | //! test erfc_special ... bench: 4,887 ns/iter (+/- 362) 46 | //! test exp_fast ... bench: 3,774 ns/iter (+/- 235) 47 | //! test exp_faster ... bench: 1,400 ns/iter (+/- 260) 48 | //! test exp_std ... bench: 2,760 ns/iter (+/- 202) 49 | //! test lambertw_fast ... bench: 21,040 ns/iter (+/- 2,523) 50 | //! test lambertw_faster ... bench: 24,840 ns/iter (+/- 8,939) 51 | //! test lambertwexpx_fast ... bench: 13,240 ns/iter (+/- 858) 52 | //! test lambertwexpx_faster ... bench: 8,857 ns/iter (+/- 1,439) 53 | //! test ln_fast ... bench: 2,087 ns/iter (+/- 528) 54 | //! test ln_faster ... bench: 1,214 ns/iter (+/- 447) 55 | //! test ln_gamma_fast ... bench: 6,193 ns/iter (+/- 901) 56 | //! test ln_gamma_faster ... bench: 2,952 ns/iter (+/- 211) 57 | //! test ln_gamma_special ... bench: 36,726 ns/iter (+/- 846) 58 | //! test ln_gamma_statrs ... bench: 56,295 ns/iter (+/- 973) 59 | //! test ln_std ... bench: 5,175 ns/iter (+/- 321) 60 | //! test log2_fast ... bench: 1,998 ns/iter (+/- 335) 61 | //! test log2_faster ... bench: 1,206 ns/iter (+/- 188) 62 | //! test log2_std ... bench: 5,087 ns/iter (+/- 1,736) 63 | //! test pow2_fast ... bench: 3,472 ns/iter (+/- 192) 64 | //! test pow2_faster ... bench: 1,069 ns/iter (+/- 356) 65 | //! test pow2_std ... bench: 2,942 ns/iter (+/- 172) 66 | //! test pow_fast ... bench: 6,769 ns/iter (+/- 1,460) 67 | //! test pow_faster ... bench: 2,365 ns/iter (+/- 293) 68 | //! test pow_std ... bench: 13,147 ns/iter (+/- 1,000) 69 | //! test sigmoid_fast ... bench: 4,818 ns/iter (+/- 428) 70 | //! test sigmoid_faster ... bench: 1,869 ns/iter (+/- 56) 71 | //! test sigmoid_std ... bench: 3,312 ns/iter (+/- 131) 72 | //! test sin_fast ... bench: 3,038 ns/iter (+/- 685) 73 | //! test sin_faster ... bench: 1,875 ns/iter (+/- 481) 74 | //! test sin_std ... bench: 7,215 ns/iter (+/- 39) 75 | //! test sinfull_fast ... bench: 6,483 ns/iter (+/- 405) 76 | //! test sinfull_faster ... bench: 3,973 ns/iter (+/- 874) 77 | //! test sinh_fast ... bench: 8,394 ns/iter (+/- 618) 78 | //! test sinh_faster ... bench: 2,588 ns/iter (+/- 286) 79 | //! test sinh_std ... bench: 4,434 ns/iter (+/- 329) 80 | //! test tan_fast ... bench: 5,165 ns/iter (+/- 174) 81 | //! test tan_faster ... bench: 3,423 ns/iter (+/- 1,083) 82 | //! test tan_std ... bench: 7,695 ns/iter (+/- 453) 83 | //! test tanfull_fast ... bench: 10,299 ns/iter (+/- 1,761) 84 | //! test tanfull_faster ... bench: 6,214 ns/iter (+/- 524) 85 | //! test tanh_fast ... bench: 5,598 ns/iter (+/- 709) 86 | //! test tanh_faster ... bench: 1,921 ns/iter (+/- 445) 87 | //! test tanh_std ... bench: 4,056 ns/iter (+/- 306) 88 | //! ``` 89 | 90 | /// Fast approximations with small error. 91 | pub mod fast; 92 | 93 | /// Faster approximations with considerable error. 94 | pub mod faster; 95 | 96 | /// Raw bits manipulations (public for pedagogical reasons). 97 | pub mod bits; 98 | -------------------------------------------------------------------------------- /fastapprox_tests/Cargo.toml: -------------------------------------------------------------------------------- 1 | [package] 2 | name = "fastapprox_tests" 3 | description = "Test suite for fastapprox crate" 4 | version = "0.3.1" 5 | authors = ["Alexey Suslov "] 6 | license = "MIT" 7 | repository = "https://github.com/loony-bean/fastapprox-rs" 8 | keywords = ["math", "machine-learning", "approximation"] 9 | edition = "2021" 10 | 11 | [dependencies] 12 | fastapprox = { path = "../fastapprox" } 13 | 14 | [[bench]] 15 | name = "tests" 16 | harness = false 17 | 18 | [build-dependencies] 19 | cc = "1" 20 | 21 | [dev-dependencies] 22 | bencher = "0.1.5" 23 | special = "0.10" 24 | statrs = "0.16" 25 | -------------------------------------------------------------------------------- /fastapprox_tests/benches/tests.rs: -------------------------------------------------------------------------------- 1 | #[macro_use] 2 | extern crate bencher; 3 | extern crate fastapprox; 4 | extern crate special; 5 | extern crate statrs; 6 | 7 | use bencher::Bencher; 8 | use fastapprox::{fast, faster}; 9 | use statrs::function::{erf, gamma}; 10 | 11 | const ITERATIONS: u32 = 1000; 12 | 13 | fn run f32>(bench: &mut Bencher, cb: F) { 14 | bench.iter(|| (0..ITERATIONS).fold(0.0, |a, b| a + cb(b as f32))) 15 | } 16 | 17 | fn log2_std(bench: &mut Bencher) { 18 | run(bench, |b| b.log2()) 19 | } 20 | 21 | fn log2_fast(bench: &mut Bencher) { 22 | run(bench, fast::log2) 23 | } 24 | 25 | fn log2_faster(bench: &mut Bencher) { 26 | run(bench, faster::log2) 27 | } 28 | 29 | fn ln_std(bench: &mut Bencher) { 30 | run(bench, |b| b.ln()) 31 | } 32 | 33 | fn ln_fast(bench: &mut Bencher) { 34 | run(bench, fast::ln); 35 | } 36 | 37 | fn ln_faster(bench: &mut Bencher) { 38 | run(bench, faster::ln); 39 | } 40 | 41 | fn exp_std(bench: &mut Bencher) { 42 | run(bench, |b| b.exp()) 43 | } 44 | 45 | fn exp_fast(bench: &mut Bencher) { 46 | run(bench, fast::exp); 47 | } 48 | 49 | fn exp_faster(bench: &mut Bencher) { 50 | run(bench, faster::exp); 51 | } 52 | 53 | fn pow2_std(bench: &mut Bencher) { 54 | run(bench, |b| 2.0_f32.powf(b)) 55 | } 56 | 57 | fn pow2_fast(bench: &mut Bencher) { 58 | run(bench, |b| fast::pow2(b)) 59 | } 60 | 61 | fn pow2_faster(bench: &mut Bencher) { 62 | run(bench, |b| faster::pow2(b)) 63 | } 64 | 65 | fn pow_std(bench: &mut Bencher) { 66 | run(bench, |b| b.powf(1.5)) 67 | } 68 | 69 | fn pow_fast(bench: &mut Bencher) { 70 | run(bench, |b| fast::pow(b, 1.5)) 71 | } 72 | 73 | fn pow_faster(bench: &mut Bencher) { 74 | run(bench, |b| faster::pow(b, 1.5)) 75 | } 76 | 77 | fn sigmoid_std(bench: &mut Bencher) { 78 | run(bench, |b| 1.0_f32 / (1.0_f32 + (-b).exp())) 79 | } 80 | 81 | fn sigmoid_fast(bench: &mut Bencher) { 82 | run(bench, fast::sigmoid) 83 | } 84 | 85 | fn sigmoid_faster(bench: &mut Bencher) { 86 | run(bench, faster::sigmoid) 87 | } 88 | 89 | fn ln_gamma_special(bench: &mut Bencher) { 90 | run(bench, |b| special::Gamma::ln_gamma(b as f64).0 as f32) 91 | } 92 | 93 | fn ln_gamma_statrs(bench: &mut Bencher) { 94 | run(bench, |b| gamma::ln_gamma(b as f64) as f32) 95 | } 96 | 97 | fn ln_gamma_fast(bench: &mut Bencher) { 98 | run(bench, fast::ln_gamma) 99 | } 100 | 101 | fn ln_gamma_faster(bench: &mut Bencher) { 102 | run(bench, faster::ln_gamma) 103 | } 104 | 105 | fn digamma_special(bench: &mut Bencher) { 106 | run(bench, |b| special::Gamma::digamma(b as f64) as f32) 107 | } 108 | 109 | fn digamma_statrs(bench: &mut Bencher) { 110 | run(bench, |b| gamma::digamma(b as f64) as f32) 111 | } 112 | 113 | fn digamma_fast(bench: &mut Bencher) { 114 | run(bench, fast::digamma) 115 | } 116 | 117 | fn digamma_faster(bench: &mut Bencher) { 118 | run(bench, faster::digamma) 119 | } 120 | 121 | fn erfc_special(bench: &mut Bencher) { 122 | run(bench, |b| special::Error::compl_error(b as f64) as f32) 123 | } 124 | 125 | fn erfc_fast(bench: &mut Bencher) { 126 | run(bench, fast::erfc) 127 | } 128 | 129 | fn erfc_faster(bench: &mut Bencher) { 130 | run(bench, faster::erfc) 131 | } 132 | 133 | fn erf_statrs(bench: &mut Bencher) { 134 | run(bench, |b| erf::erf(b as f64) as f32) 135 | } 136 | 137 | fn erf_special(bench: &mut Bencher) { 138 | run(bench, |b| special::Error::error(b as f64) as f32) 139 | } 140 | 141 | fn erf_fast(bench: &mut Bencher) { 142 | run(bench, fast::erf) 143 | } 144 | 145 | fn erf_faster(bench: &mut Bencher) { 146 | run(bench, faster::erf) 147 | } 148 | 149 | fn erf_inv_statrs(bench: &mut Bencher) { 150 | run(bench, |b| erf::erf_inv(b as f64) as f32) 151 | } 152 | 153 | fn erf_inv_fast(bench: &mut Bencher) { 154 | run(bench, fast::erf_inv) 155 | } 156 | 157 | fn erf_inv_faster(bench: &mut Bencher) { 158 | run(bench, faster::erf_inv) 159 | } 160 | 161 | fn sinh_std(bench: &mut Bencher) { 162 | run(bench, |b| b.sinh()) 163 | } 164 | 165 | fn sinh_fast(bench: &mut Bencher) { 166 | run(bench, fast::sinh) 167 | } 168 | 169 | fn sinh_faster(bench: &mut Bencher) { 170 | run(bench, faster::sinh) 171 | } 172 | 173 | fn cosh_std(bench: &mut Bencher) { 174 | run(bench, |b| b.cosh()) 175 | } 176 | 177 | fn cosh_fast(bench: &mut Bencher) { 178 | run(bench, fast::cosh) 179 | } 180 | 181 | fn cosh_faster(bench: &mut Bencher) { 182 | run(bench, faster::cosh) 183 | } 184 | 185 | fn tanh_std(bench: &mut Bencher) { 186 | run(bench, |b| b.tanh()) 187 | } 188 | 189 | fn tanh_fast(bench: &mut Bencher) { 190 | run(bench, fast::tanh) 191 | } 192 | 193 | fn tanh_faster(bench: &mut Bencher) { 194 | run(bench, faster::tanh) 195 | } 196 | 197 | fn lambertw_fast(bench: &mut Bencher) { 198 | run(bench, fast::lambertw) 199 | } 200 | 201 | fn lambertw_faster(bench: &mut Bencher) { 202 | run(bench, faster::lambertw) 203 | } 204 | 205 | fn lambertwexpx_fast(bench: &mut Bencher) { 206 | run(bench, fast::lambertwexpx) 207 | } 208 | 209 | fn lambertwexpx_faster(bench: &mut Bencher) { 210 | run(bench, faster::lambertwexpx) 211 | } 212 | 213 | fn sin_std(bench: &mut Bencher) { 214 | run(bench, |b| b.sin()) 215 | } 216 | 217 | fn sin_fast(bench: &mut Bencher) { 218 | run(bench, fast::sin) 219 | } 220 | 221 | fn sinfull_fast(bench: &mut Bencher) { 222 | run(bench, fast::sinfull) 223 | } 224 | 225 | fn sin_faster(bench: &mut Bencher) { 226 | run(bench, faster::sin) 227 | } 228 | 229 | fn sinfull_faster(bench: &mut Bencher) { 230 | run(bench, faster::sinfull) 231 | } 232 | 233 | fn cos_std(bench: &mut Bencher) { 234 | run(bench, |b| b.cos()) 235 | } 236 | 237 | fn cos_fast(bench: &mut Bencher) { 238 | run(bench, fast::cos) 239 | } 240 | 241 | fn cosfull_fast(bench: &mut Bencher) { 242 | run(bench, fast::cosfull) 243 | } 244 | 245 | fn cos_faster(bench: &mut Bencher) { 246 | run(bench, faster::cos) 247 | } 248 | 249 | fn cosfull_faster(bench: &mut Bencher) { 250 | run(bench, faster::cosfull) 251 | } 252 | 253 | fn tan_std(bench: &mut Bencher) { 254 | run(bench, |b| b.tan()) 255 | } 256 | 257 | fn tan_fast(bench: &mut Bencher) { 258 | run(bench, fast::tan) 259 | } 260 | 261 | fn tanfull_fast(bench: &mut Bencher) { 262 | run(bench, fast::tanfull) 263 | } 264 | 265 | fn tan_faster(bench: &mut Bencher) { 266 | run(bench, faster::tan) 267 | } 268 | 269 | fn tanfull_faster(bench: &mut Bencher) { 270 | run(bench, faster::tanfull) 271 | } 272 | 273 | benchmark_group!( 274 | benches, 275 | log2_std, 276 | log2_fast, 277 | log2_faster, 278 | ln_std, 279 | ln_fast, 280 | ln_faster, 281 | exp_std, 282 | exp_fast, 283 | exp_faster, 284 | pow2_std, 285 | pow2_fast, 286 | pow2_faster, 287 | pow_std, 288 | pow_fast, 289 | pow_faster, 290 | sigmoid_std, 291 | sigmoid_fast, 292 | sigmoid_faster, 293 | ln_gamma_special, 294 | ln_gamma_statrs, 295 | ln_gamma_fast, 296 | ln_gamma_faster, 297 | digamma_special, 298 | digamma_statrs, 299 | digamma_fast, 300 | digamma_faster, 301 | erf_statrs, 302 | erf_special, 303 | erf_fast, 304 | erf_faster, 305 | erfc_special, 306 | erfc_fast, 307 | erfc_faster, 308 | erf_inv_statrs, 309 | erf_inv_fast, 310 | erf_inv_faster, 311 | sinh_std, 312 | sinh_fast, 313 | sinh_faster, 314 | cosh_std, 315 | cosh_fast, 316 | cosh_faster, 317 | tanh_std, 318 | tanh_fast, 319 | tanh_faster, 320 | lambertw_fast, 321 | lambertw_faster, 322 | lambertwexpx_fast, 323 | lambertwexpx_faster, 324 | sin_std, 325 | sin_fast, 326 | sinfull_fast, 327 | sin_faster, 328 | sinfull_faster, 329 | cos_std, 330 | cos_fast, 331 | cosfull_fast, 332 | cos_faster, 333 | cosfull_faster, 334 | tan_std, 335 | tan_fast, 336 | tanfull_fast, 337 | tan_faster, 338 | tanfull_faster 339 | ); 340 | benchmark_main!(benches); 341 | -------------------------------------------------------------------------------- /fastapprox_tests/build.rs: -------------------------------------------------------------------------------- 1 | extern crate cc; 2 | 3 | fn main() { 4 | cc::Build::new() 5 | .file("tests/c/fastapprox.c") 6 | .compile("fastapprox"); 7 | } 8 | -------------------------------------------------------------------------------- /fastapprox_tests/tests/c/fastapprox.c: -------------------------------------------------------------------------------- 1 | /*=====================================================================* 2 | * Copyright (C) 2012 Paul Mineiro * 3 | * All rights reserved. * 4 | * * 5 | * Redistribution and use in source and binary forms, with * 6 | * or without modification, are permitted provided that the * 7 | * following conditions are met: * 8 | * * 9 | * * Redistributions of source code must retain the * 10 | * above copyright notice, this list of conditions and * 11 | * the following disclaimer. * 12 | * * 13 | * * Redistributions in binary form must reproduce the * 14 | * above copyright notice, this list of conditions and * 15 | * the following disclaimer in the documentation and/or * 16 | * other materials provided with the distribution. * 17 | * * 18 | * * Neither the name of Paul Mineiro nor the names * 19 | * of other contributors may be used to endorse or promote * 20 | * products derived from this software without specific * 21 | * prior written permission. * 22 | * * 23 | * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND * 24 | * CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, * 25 | * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES * 26 | * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * 27 | * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER * 28 | * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, * 29 | * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES * 30 | * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE * 31 | * GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR * 32 | * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF * 33 | * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * 34 | * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * 35 | * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE * 36 | * POSSIBILITY OF SUCH DAMAGE. * 37 | * * 38 | * Contact: Paul Mineiro * 39 | *=====================================================================*/ 40 | 41 | #include 42 | #include 43 | #include 44 | 45 | // Underflow of exponential is common practice in numerical routines, 46 | // so handle it here. 47 | 48 | float 49 | fastpow2 (float p) 50 | { 51 | float offset = (p < 0) ? 1.0f : 0.0f; 52 | float clipp = (p < -126) ? -126.0f : p; 53 | int w = clipp; 54 | float z = clipp - w + offset; 55 | union { uint32_t i; float f; } v = { (uint32_t) ( (1 << 23) * (clipp + 121.2740575f + 27.7280233f / (4.84252568f - z) - 1.49012907f * z) ) }; 56 | 57 | return v.f; 58 | } 59 | 60 | float 61 | fastexp (float p) 62 | { 63 | return fastpow2 (1.442695040f * p); 64 | } 65 | 66 | float 67 | fasterpow2 (float p) 68 | { 69 | float clipp = (p < -126) ? -126.0f : p; 70 | union { uint32_t i; float f; } v = { (uint32_t) ( (1 << 23) * (clipp + 126.94269504f) ) }; 71 | return v.f; 72 | } 73 | 74 | float 75 | fasterexp (float p) 76 | { 77 | return fasterpow2 (1.442695040f * p); 78 | } 79 | 80 | float 81 | fastlog2 (float x) 82 | { 83 | union { float f; uint32_t i; } vx = { x }; 84 | union { uint32_t i; float f; } mx = { (vx.i & 0x007FFFFF) | 0x3f000000 }; 85 | float y = vx.i; 86 | y *= 1.1920928955078125e-7f; 87 | 88 | return y - 124.22551499f 89 | - 1.498030302f * mx.f 90 | - 1.72587999f / (0.3520887068f + mx.f); 91 | } 92 | 93 | float 94 | fastlog (float x) 95 | { 96 | return 0.69314718f * fastlog2 (x); 97 | } 98 | 99 | float 100 | fasterlog2 (float x) 101 | { 102 | union { float f; uint32_t i; } vx = { x }; 103 | float y = vx.i; 104 | y *= 1.1920928955078125e-7f; 105 | return y - 126.94269504f; 106 | } 107 | 108 | float 109 | fasterlog (float x) 110 | { 111 | union { float f; uint32_t i; } vx = { x }; 112 | float y = vx.i; 113 | y *= 8.2629582881927490e-8f; 114 | return y - 87.989971088f; 115 | } 116 | 117 | float 118 | fastpow (float x, 119 | float p) 120 | { 121 | return fastpow2 (p * fastlog2 (x)); 122 | } 123 | 124 | float 125 | fasterpow (float x, 126 | float p) 127 | { 128 | return fasterpow2 (p * fasterlog2 (x)); 129 | } 130 | 131 | float 132 | fastsigmoid (float x) 133 | { 134 | return 1.0f / (1.0f + fastexp (-x)); 135 | } 136 | 137 | float 138 | fastersigmoid (float x) 139 | { 140 | return 1.0f / (1.0f + fasterexp (-x)); 141 | } 142 | 143 | float 144 | fastlgamma (float x) 145 | { 146 | float logterm = fastlog (x * (1.0f + x) * (2.0f + x)); 147 | float xp3 = 3.0f + x; 148 | 149 | return - 2.081061466f 150 | - x 151 | + 0.0833333f / xp3 152 | - logterm 153 | + (2.5f + x) * fastlog (xp3); 154 | } 155 | 156 | float 157 | fasterlgamma (float x) 158 | { 159 | return - 0.0810614667f 160 | - x 161 | - fasterlog (x) 162 | + (0.5f + x) * fasterlog (1.0f + x); 163 | } 164 | 165 | float 166 | fastdigamma (float x) 167 | { 168 | float twopx = 2.0f + x; 169 | float logterm = fastlog (twopx); 170 | 171 | return (-48.0f + x * (-157.0f + x * (-127.0f - 30.0f * x))) / 172 | (12.0f * x * (1.0f + x) * twopx * twopx) 173 | + logterm; 174 | } 175 | 176 | float 177 | fasterdigamma (float x) 178 | { 179 | float onepx = 1.0f + x; 180 | 181 | return -1.0f / x - 1.0f / (2 * onepx) + fasterlog (onepx); 182 | } 183 | 184 | float 185 | fasterfc (float x) 186 | { 187 | static const float k = 3.3509633149424609f; 188 | static const float a = 0.07219054755431126f; 189 | static const float b = 15.418191568719577f; 190 | static const float c = 5.609846028328545f; 191 | 192 | union { float f; uint32_t i; } vc = { c * x }; 193 | float xsq = x * x; 194 | float xquad = xsq * xsq; 195 | 196 | vc.i |= 0x80000000; 197 | 198 | return 2.0f / (1.0f + fastpow2 (k * x)) - a * x * (b * xquad - 1.0f) * fasterpow2 (vc.f); 199 | } 200 | 201 | float 202 | fastererfc (float x) 203 | { 204 | static const float k = 3.3509633149424609f; 205 | 206 | return 2.0f / (1.0f + fasterpow2 (k * x)); 207 | } 208 | 209 | float 210 | fasterf (float x) 211 | { 212 | return 1.0f - fasterfc (x); 213 | } 214 | 215 | float 216 | fastererf (float x) 217 | { 218 | return 1.0f - fastererfc (x); 219 | } 220 | 221 | float 222 | fastinverseerf (float x) 223 | { 224 | static const float invk = 0.30004578719350504f; 225 | static const float a = 0.020287853348211326f; 226 | static const float b = 0.07236892874789555f; 227 | static const float c = 0.9913030456864257f; 228 | static const float d = 0.8059775923760193f; 229 | 230 | float xsq = x * x; 231 | 232 | return invk * fastlog2 ((1.0f + x) / (1.0f - x)) 233 | + x * (a - b * xsq) / (c - d * xsq); 234 | } 235 | 236 | float 237 | fasterinverseerf (float x) 238 | { 239 | static const float invk = 0.30004578719350504f; 240 | 241 | return invk * fasterlog2 ((1.0f + x) / (1.0f - x)); 242 | } 243 | 244 | float 245 | fastsinh (float p) 246 | { 247 | return 0.5f * (fastexp (p) - fastexp (-p)); 248 | } 249 | 250 | float 251 | fastersinh (float p) 252 | { 253 | return 0.5f * (fasterexp (p) - fasterexp (-p)); 254 | } 255 | 256 | float 257 | fastcosh (float p) 258 | { 259 | return 0.5f * (fastexp (p) + fastexp (-p)); 260 | } 261 | 262 | float 263 | fastercosh (float p) 264 | { 265 | return 0.5f * (fasterexp (p) + fasterexp (-p)); 266 | } 267 | 268 | float 269 | fasttanh (float p) 270 | { 271 | return -1.0f + 2.0f / (1.0f + fastexp (-2.0f * p)); 272 | } 273 | 274 | float 275 | fastertanh (float p) 276 | { 277 | return -1.0f + 2.0f / (1.0f + fasterexp (-2.0f * p)); 278 | } 279 | 280 | // these functions compute the upper branch aka W_0 281 | 282 | float 283 | fastlambertw (float x) 284 | { 285 | static const float threshold = 2.26445f; 286 | 287 | float c = (x < threshold) ? 1.546865557f : 1.0f; 288 | float d = (x < threshold) ? 2.250366841f : 0.0f; 289 | float a = (x < threshold) ? -0.737769969f : 0.0f; 290 | 291 | float logterm = fastlog (c * x + d); 292 | float loglogterm = fastlog (logterm); 293 | 294 | float minusw = -a - logterm + loglogterm - loglogterm / logterm; 295 | float expminusw = fastexp (minusw); 296 | float xexpminusw = x * expminusw; 297 | float pexpminusw = xexpminusw - minusw; 298 | 299 | return (2.0f * xexpminusw - minusw * (4.0f * xexpminusw - minusw * pexpminusw)) / 300 | (2.0f + pexpminusw * (2.0f - minusw)); 301 | } 302 | 303 | float 304 | fasterlambertw (float x) 305 | { 306 | static const float threshold = 2.26445f; 307 | 308 | float c = (x < threshold) ? 1.546865557f : 1.0f; 309 | float d = (x < threshold) ? 2.250366841f : 0.0f; 310 | float a = (x < threshold) ? -0.737769969f : 0.0f; 311 | 312 | float logterm = fasterlog (c * x + d); 313 | float loglogterm = fasterlog (logterm); 314 | 315 | float w = a + logterm - loglogterm + loglogterm / logterm; 316 | float expw = fasterexp (-w); 317 | 318 | return (w * w + expw * x) / (1.0f + w); 319 | } 320 | 321 | float 322 | fastlambertwexpx (float x) 323 | { 324 | static const float k = 1.1765631309f; 325 | static const float a = 0.94537622168f; 326 | 327 | float logarg = fmaxf (x, k); 328 | float powarg = (x < k) ? a * (x - k) : 0; 329 | 330 | float logterm = fastlog (logarg); 331 | float powterm = fasterpow2 (powarg); // don't need accuracy here 332 | 333 | float w = powterm * (logarg - logterm + logterm / logarg); 334 | float logw = fastlog (w); 335 | float p = x - logw; 336 | 337 | return w * (2.0f + p + w * (3.0f + 2.0f * p)) / 338 | (2.0f - p + w * (5.0f + 2.0f * w)); 339 | } 340 | 341 | float 342 | fasterlambertwexpx (float x) 343 | { 344 | static const float k = 1.1765631309f; 345 | static const float a = 0.94537622168f; 346 | 347 | float logarg = fmaxf (x, k); 348 | float powarg = (x < k) ? a * (x - k) : 0; 349 | 350 | float logterm = fasterlog (logarg); 351 | float powterm = fasterpow2 (powarg); 352 | 353 | float w = powterm * (logarg - logterm + logterm / logarg); 354 | float logw = fasterlog (w); 355 | 356 | return w * (1.0f + x - logw) / (1.0f + w); 357 | } 358 | 359 | // http://www.devmaster.net/forums/showthread.php?t=5784 360 | // fast sine variants are for x \in [ -\pi, pi ] 361 | // fast cosine variants are for x \in [ -\pi, pi ] 362 | // fast tangent variants are for x \in [ -\pi / 2, pi / 2 ] 363 | // "full" versions of functions handle the entire range of inputs 364 | // although the range reduction technique used here will be hopelessly 365 | // inaccurate for |x| >> 1000 366 | // 367 | // WARNING: fastsinfull, fastcosfull, and fasttanfull can be slower than 368 | // libc calls on older machines (!) and on newer machines are only 369 | // slighly faster. however: 370 | // * vectorized versions are competitive 371 | // * faster full versions are competitive 372 | 373 | float 374 | fastsin (float x) 375 | { 376 | static const float fouroverpi = 1.2732395447351627f; 377 | static const float fouroverpisq = 0.40528473456935109f; 378 | static const float q = 0.78444488374548933f; 379 | union { float f; uint32_t i; } p = { 0.20363937680730309f }; 380 | union { float f; uint32_t i; } r = { 0.015124940802184233f }; 381 | union { float f; uint32_t i; } s = { -0.0032225901625579573f }; 382 | 383 | union { float f; uint32_t i; } vx = { x }; 384 | uint32_t sign = vx.i & 0x80000000; 385 | vx.i = vx.i & 0x7FFFFFFF; 386 | 387 | float qpprox = fouroverpi * x - fouroverpisq * x * vx.f; 388 | float qpproxsq = qpprox * qpprox; 389 | 390 | p.i |= sign; 391 | r.i |= sign; 392 | s.i ^= sign; 393 | 394 | return q * qpprox + qpproxsq * (p.f + qpproxsq * (r.f + qpproxsq * s.f)); 395 | } 396 | 397 | float 398 | fastersin (float x) 399 | { 400 | static const float fouroverpi = 1.2732395447351627f; 401 | static const float fouroverpisq = 0.40528473456935109f; 402 | static const float q = 0.77633023248007499f; 403 | union { float f; uint32_t i; } p = { 0.22308510060189463f }; 404 | 405 | union { float f; uint32_t i; } vx = { x }; 406 | uint32_t sign = vx.i & 0x80000000; 407 | vx.i &= 0x7FFFFFFF; 408 | 409 | float qpprox = fouroverpi * x - fouroverpisq * x * vx.f; 410 | 411 | p.i |= sign; 412 | 413 | return qpprox * (q + p.f * qpprox); 414 | } 415 | 416 | float 417 | fastsinfull (float x) 418 | { 419 | static const float twopi = 6.2831853071795865f; 420 | static const float invtwopi = 0.15915494309189534f; 421 | 422 | int k = x * invtwopi; 423 | float half = (x < 0) ? -0.5f : 0.5f; 424 | return fastsin ((half + k) * twopi - x); 425 | } 426 | 427 | float 428 | fastersinfull (float x) 429 | { 430 | static const float twopi = 6.2831853071795865f; 431 | static const float invtwopi = 0.15915494309189534f; 432 | 433 | int k = x * invtwopi; 434 | float half = (x < 0) ? -0.5f : 0.5f; 435 | return fastersin ((half + k) * twopi - x); 436 | } 437 | 438 | float 439 | fastcos (float x) 440 | { 441 | static const float halfpi = 1.5707963267948966f; 442 | static const float halfpiminustwopi = -4.7123889803846899f; 443 | float offset = (x > halfpi) ? halfpiminustwopi : halfpi; 444 | return fastsin (x + offset); 445 | } 446 | 447 | float 448 | fastercos (float x) 449 | { 450 | static const float twooverpi = 0.63661977236758134f; 451 | static const float p = 0.54641335845679634f; 452 | 453 | union { float f; uint32_t i; } vx = { x }; 454 | vx.i &= 0x7FFFFFFF; 455 | 456 | float qpprox = 1.0f - twooverpi * vx.f; 457 | 458 | return qpprox + p * qpprox * (1.0f - qpprox * qpprox); 459 | } 460 | 461 | float 462 | fastcosfull (float x) 463 | { 464 | static const float halfpi = 1.5707963267948966f; 465 | return fastsinfull (x + halfpi); 466 | } 467 | 468 | float 469 | fastercosfull (float x) 470 | { 471 | static const float halfpi = 1.5707963267948966f; 472 | return fastersinfull (x + halfpi); 473 | } 474 | 475 | float 476 | fasttan (float x) 477 | { 478 | static const float halfpi = 1.5707963267948966f; 479 | return fastsin (x) / fastsin (x + halfpi); 480 | } 481 | 482 | float 483 | fastertan (float x) 484 | { 485 | return fastersin (x) / fastercos (x); 486 | } 487 | 488 | float 489 | fasttanfull (float x) 490 | { 491 | static const float twopi = 6.2831853071795865f; 492 | static const float invtwopi = 0.15915494309189534f; 493 | 494 | int k = x * invtwopi; 495 | float half = (x < 0) ? -0.5f : 0.5f; 496 | float xnew = x - (half + k) * twopi; 497 | 498 | return fastsin (xnew) / fastcos (xnew); 499 | } 500 | 501 | float 502 | fastertanfull (float x) 503 | { 504 | static const float twopi = 6.2831853071795865f; 505 | static const float invtwopi = 0.15915494309189534f; 506 | 507 | int k = x * invtwopi; 508 | float half = (x < 0) ? -0.5f : 0.5f; 509 | float xnew = x - (half + k) * twopi; 510 | 511 | return fastersin (xnew) / fastercos (xnew); 512 | } 513 | -------------------------------------------------------------------------------- /fastapprox_tests/tests/c/mod.rs: -------------------------------------------------------------------------------- 1 | macro_rules! extern_unsafe_wraps { 2 | ( 3 | $( $fn:ident, )* 4 | ) => { 5 | mod cc { 6 | extern { 7 | $( 8 | pub fn $fn(x: f32) -> f32; 9 | )* 10 | } 11 | } 12 | 13 | $( 14 | #[inline] 15 | pub fn $fn(x: f32) -> f32 { 16 | unsafe { cc::$fn(x) } 17 | } 18 | )* 19 | } 20 | } 21 | 22 | extern_unsafe_wraps! { 23 | // fast 24 | fastpow2, 25 | fastlog2, 26 | fastlog, 27 | fastexp, 28 | fastsigmoid, 29 | fastlgamma, 30 | fastdigamma, 31 | fasterf, 32 | fasterfc, 33 | fastinverseerf, 34 | fastsinh, 35 | fastcosh, 36 | fasttanh, 37 | fastlambertw, 38 | fastlambertwexpx, 39 | fastsin, 40 | fastcos, 41 | fasttan, 42 | fastsinfull, 43 | fastcosfull, 44 | fasttanfull, 45 | // faster 46 | fasterpow2, 47 | fasterlog2, 48 | fasterlog, 49 | fasterexp, 50 | fastersigmoid, 51 | fasterlgamma, 52 | fasterdigamma, 53 | fastererf, 54 | fastererfc, 55 | fasterinverseerf, 56 | fastersinh, 57 | fastercosh, 58 | fastertanh, 59 | fasterlambertw, 60 | fasterlambertwexpx, 61 | fastersin, 62 | fastercos, 63 | fastertan, 64 | fastersinfull, 65 | fastercosfull, 66 | fastertanfull, 67 | } 68 | -------------------------------------------------------------------------------- /fastapprox_tests/tests/compare.rs: -------------------------------------------------------------------------------- 1 | #![cfg(test)] 2 | 3 | extern crate fastapprox; 4 | extern crate special; 5 | extern crate statrs; 6 | 7 | use fastapprox::{fast, faster}; 8 | use statrs::function::erf; 9 | use std::convert::Into; 10 | 11 | mod c; 12 | 13 | const FLOATS: &[f32] = &[-5.0, -0.25, -0.05, 0.0, 0.05, 1.0, 2.0, 3.0, 10.0]; 14 | const POS_FLOATS: &[f32] = &[0.01, 0.05, 1.0, 2.1, 3.5, 100.0]; 15 | const BETWEEN_ONES: &[f32] = &[-0.9, -0.5, -0.1, -0.01, 0.0, 0.01, 0.1, 0.5, 0.9]; 16 | const BETWEEN_PIS: &[f32] = &[ 17 | -3.14, -1.5, -1.0, -0.5, -0.1, -0.01, 0.0, 0.01, 0.1, 0.5, 1.0, 1.5, 3.14, 18 | ]; 19 | const BETWEEN_HALFPIS: &[f32] = &[ 20 | -1.56, -1.5, -1.0, -0.5, -0.1, -0.01, 0.0, 0.01, 0.1, 0.5, 1.0, 1.5, 1.56, 21 | ]; 22 | 23 | fn compare(func: F1, base: F2, values: &[f32], tolerance: T) 24 | where 25 | F1: Fn(f32) -> f32, 26 | F2: Fn(f32) -> f32, 27 | T: Into>, 28 | { 29 | let tol = tolerance.into(); 30 | for value in values { 31 | let r1 = func(*value); 32 | let r2 = base(*value); 33 | if let Some(tolerance) = tol { 34 | let d = if r2.abs() < 0.1 { 35 | (r1 - r2).abs() 36 | } else { 37 | ((r1 - r2) / r2).abs() 38 | }; 39 | assert!( 40 | d < tolerance, 41 | "func({}) = {}, but base({}) = {}, Δ == {}", 42 | value, 43 | r1, 44 | value, 45 | r2, 46 | d 47 | ); 48 | } else { 49 | assert_eq!( 50 | r1, r2, 51 | "func({}) = {}, but base({}) = {}", 52 | value, r1, value, r2 53 | ); 54 | } 55 | } 56 | } 57 | 58 | fn compare_exact(func: F1, base: F2, values: &[f32]) 59 | where 60 | F1: Fn(f32) -> f32, 61 | F2: Fn(f32) -> f32, 62 | { 63 | compare(func, base, values, None); 64 | } 65 | 66 | fn compare_near(func: F1, base: F2, values: &[f32]) 67 | where 68 | F1: Fn(f32) -> f32, 69 | F2: Fn(f32) -> f32, 70 | { 71 | compare(func, base, values, 0.01); 72 | } 73 | 74 | fn compare_far(func: F1, base: F2, values: &[f32]) 75 | where 76 | F1: Fn(f32) -> f32, 77 | F2: Fn(f32) -> f32, 78 | { 79 | compare(func, base, values, 0.15); 80 | } 81 | 82 | #[test] 83 | fn test_pow2_approx() { 84 | compare_exact(fast::pow2, c::fastpow2, FLOATS); 85 | compare_exact(faster::pow2, c::fasterpow2, FLOATS); 86 | } 87 | 88 | #[test] 89 | fn test_pow2_exact() { 90 | compare_near(fast::pow2, |x| (2.0_f32).powf(x), FLOATS); 91 | compare_far(faster::pow2, |x| (2.0_f32).powf(x), FLOATS); 92 | } 93 | 94 | #[test] 95 | fn test_log2_approx() { 96 | compare_exact(fast::log2, c::fastlog2, POS_FLOATS); 97 | compare_exact(faster::log2, c::fasterlog2, POS_FLOATS); 98 | } 99 | 100 | #[test] 101 | fn test_log2_exact() { 102 | compare_near(fast::log2, f32::log2, POS_FLOATS); 103 | compare_far(faster::log2, f32::log2, POS_FLOATS); 104 | } 105 | 106 | #[test] 107 | fn test_ln_approx() { 108 | compare_exact(fast::ln, c::fastlog, POS_FLOATS); 109 | compare_exact(faster::ln, c::fasterlog, POS_FLOATS); 110 | } 111 | 112 | #[test] 113 | fn test_ln_exact() { 114 | compare_near(fast::ln, f32::ln, POS_FLOATS); 115 | compare_far(faster::ln, f32::ln, POS_FLOATS); 116 | } 117 | 118 | #[test] 119 | fn test_exp_approx() { 120 | compare_exact(fast::exp, c::fastexp, FLOATS); 121 | compare_exact(faster::exp, c::fasterexp, FLOATS); 122 | } 123 | 124 | #[test] 125 | fn test_exp_exact() { 126 | compare_near(fast::exp, f32::exp, FLOATS); 127 | compare_far(faster::exp, f32::exp, FLOATS); 128 | } 129 | 130 | #[test] 131 | fn test_sigmoid_approx() { 132 | compare_exact(fast::sigmoid, c::fastsigmoid, FLOATS); 133 | compare_exact(faster::sigmoid, c::fastersigmoid, FLOATS); 134 | } 135 | 136 | #[test] 137 | fn test_sigmoid_exact() { 138 | compare_near(fast::sigmoid, |x| (1.0_f32 + (-x).exp()).recip(), FLOATS); 139 | compare_far(faster::sigmoid, |x| (1.0_f32 + (-x).exp()).recip(), FLOATS); 140 | } 141 | 142 | #[test] 143 | fn test_lgamma_approx() { 144 | compare_exact(fast::ln_gamma, c::fastlgamma, POS_FLOATS); 145 | compare_exact(faster::ln_gamma, c::fasterlgamma, POS_FLOATS); 146 | } 147 | 148 | #[test] 149 | fn test_lgamma_exact() { 150 | compare_near( 151 | fast::ln_gamma, 152 | |x| special::Gamma::ln_gamma(x as f64).0 as f32, 153 | POS_FLOATS, 154 | ); 155 | compare_far( 156 | faster::ln_gamma, 157 | |x| special::Gamma::ln_gamma(x as f64).0 as f32, 158 | POS_FLOATS, 159 | ); 160 | } 161 | 162 | #[test] 163 | fn test_digamma_approx() { 164 | compare_exact(fast::digamma, c::fastdigamma, POS_FLOATS); 165 | compare_exact(faster::digamma, c::fasterdigamma, POS_FLOATS); 166 | } 167 | 168 | #[test] 169 | fn test_digamma_exact() { 170 | compare_near( 171 | fast::digamma, 172 | |x| special::Gamma::digamma(x as f64) as f32, 173 | POS_FLOATS, 174 | ); 175 | compare_far( 176 | faster::digamma, 177 | |x| special::Gamma::digamma(x as f64) as f32, 178 | POS_FLOATS, 179 | ); 180 | } 181 | 182 | #[test] 183 | fn test_erf_approx() { 184 | compare_exact(fast::erf, c::fasterf, POS_FLOATS); 185 | compare_exact(faster::erf, c::fastererf, POS_FLOATS); 186 | } 187 | 188 | #[test] 189 | fn test_erf_exact() { 190 | compare_near( 191 | fast::erf, 192 | |x| special::Error::error(x as f64) as f32, 193 | POS_FLOATS, 194 | ); 195 | compare_far( 196 | faster::erf, 197 | |x| special::Error::error(x as f64) as f32, 198 | POS_FLOATS, 199 | ); 200 | } 201 | 202 | #[test] 203 | fn test_erfc_approx() { 204 | compare_exact(fast::erfc, c::fasterfc, POS_FLOATS); 205 | compare_exact(faster::erfc, c::fastererfc, POS_FLOATS); 206 | } 207 | 208 | #[test] 209 | fn test_erfc_exact() { 210 | compare_near( 211 | fast::erfc, 212 | |x| special::Error::compl_error(x as f64) as f32, 213 | POS_FLOATS, 214 | ); 215 | compare_far( 216 | faster::erfc, 217 | |x| special::Error::compl_error(x as f64) as f32, 218 | POS_FLOATS, 219 | ); 220 | } 221 | 222 | #[test] 223 | fn test_inverse_erf_approx() { 224 | compare_exact(fast::erf_inv, c::fastinverseerf, BETWEEN_ONES); 225 | compare_exact(faster::erf_inv, c::fasterinverseerf, BETWEEN_ONES); 226 | } 227 | 228 | #[test] 229 | fn test_inverse_erf_exact() { 230 | compare_near( 231 | fast::erf_inv, 232 | |x| erf::erf_inv(x as f64) as f32, 233 | BETWEEN_ONES, 234 | ); 235 | compare_far( 236 | faster::erf_inv, 237 | |x| erf::erf_inv(x as f64) as f32, 238 | BETWEEN_ONES, 239 | ); 240 | } 241 | 242 | #[test] 243 | fn test_sinh_approx() { 244 | compare_exact(fast::sinh, c::fastsinh, BETWEEN_PIS); 245 | compare_exact(faster::sinh, c::fastersinh, BETWEEN_PIS); 246 | } 247 | 248 | #[test] 249 | fn test_sinh_exact() { 250 | compare_near(fast::sinh, f32::sinh, BETWEEN_PIS); 251 | compare_far(faster::sinh, f32::sinh, BETWEEN_PIS); 252 | } 253 | 254 | #[test] 255 | fn test_cosh_approx() { 256 | compare_exact(fast::cosh, c::fastcosh, BETWEEN_PIS); 257 | compare_exact(faster::cosh, c::fastercosh, BETWEEN_PIS); 258 | } 259 | 260 | #[test] 261 | fn test_cosh_exact() { 262 | compare_near(fast::cosh, f32::cosh, BETWEEN_PIS); 263 | compare_far(faster::cosh, f32::cosh, BETWEEN_PIS); 264 | } 265 | 266 | #[test] 267 | fn test_tanh_approx() { 268 | compare_exact(fast::tanh, c::fasttanh, FLOATS); 269 | compare_exact(faster::tanh, c::fastertanh, FLOATS); 270 | } 271 | 272 | #[test] 273 | fn test_tanh_exact() { 274 | compare_near(fast::tanh, f32::tanh, FLOATS); 275 | compare_far(faster::tanh, f32::tanh, FLOATS); 276 | } 277 | 278 | #[test] 279 | fn test_lambertw_approx() { 280 | compare_exact(fast::lambertw, c::fastlambertw, FLOATS); 281 | compare_exact(faster::lambertw, c::fasterlambertw, FLOATS); 282 | } 283 | 284 | #[test] 285 | fn test_lambertwexpx_approx() { 286 | compare_exact(fast::lambertwexpx, c::fastlambertwexpx, FLOATS); 287 | compare_exact(faster::lambertwexpx, c::fasterlambertwexpx, FLOATS); 288 | } 289 | 290 | #[test] 291 | fn test_sin_approx() { 292 | compare_exact(fast::sin, c::fastsin, BETWEEN_PIS); 293 | compare_exact(faster::sin, c::fastersin, BETWEEN_PIS); 294 | } 295 | 296 | #[test] 297 | fn test_sin_exact() { 298 | compare_near(fast::sin, f32::sin, BETWEEN_PIS); 299 | compare_far(faster::sin, f32::sin, BETWEEN_PIS); 300 | } 301 | 302 | #[test] 303 | fn test_sinfull_approx() { 304 | compare_exact(fast::sinfull, c::fastsinfull, FLOATS); 305 | compare_exact(faster::sinfull, c::fastersinfull, FLOATS); 306 | } 307 | 308 | #[test] 309 | fn test_sinfull_exact() { 310 | compare_near(fast::sinfull, f32::sin, FLOATS); 311 | compare_far(faster::sinfull, f32::sin, FLOATS); 312 | } 313 | 314 | #[test] 315 | fn test_cos_approx() { 316 | compare_exact(fast::cos, c::fastcos, BETWEEN_PIS); 317 | compare_exact(faster::cos, c::fastercos, BETWEEN_PIS); 318 | } 319 | 320 | #[test] 321 | fn test_cos_exact() { 322 | compare_near(fast::cos, f32::cos, BETWEEN_PIS); 323 | compare_far(faster::cos, f32::cos, BETWEEN_PIS); 324 | } 325 | 326 | #[test] 327 | fn test_cosfull_approx() { 328 | compare_exact(fast::cosfull, c::fastcosfull, FLOATS); 329 | compare_exact(faster::cosfull, c::fastercosfull, FLOATS); 330 | } 331 | 332 | #[test] 333 | fn test_cosfull_exact() { 334 | compare_near(fast::cosfull, f32::cos, FLOATS); 335 | compare_far(faster::cosfull, f32::cos, FLOATS); 336 | } 337 | 338 | #[test] 339 | fn test_tan_approx() { 340 | compare_exact(fast::tan, c::fasttan, BETWEEN_HALFPIS); 341 | compare_exact(faster::tan, c::fastertan, BETWEEN_HALFPIS); 342 | } 343 | 344 | #[test] 345 | fn test_tan_exact() { 346 | compare_near(fast::tan, f32::tan, BETWEEN_HALFPIS); 347 | compare_far(faster::tan, f32::tan, BETWEEN_HALFPIS); 348 | } 349 | 350 | #[test] 351 | fn test_tanfull_approx() { 352 | compare_exact(fast::tanfull, c::fasttanfull, FLOATS); 353 | compare_exact(faster::tanfull, c::fastertanfull, FLOATS); 354 | } 355 | 356 | #[test] 357 | fn test_tanfull_exact() { 358 | compare_near(fast::tanfull, f32::tan, FLOATS); 359 | compare_far(faster::tanfull, f32::tan, FLOATS); 360 | } 361 | --------------------------------------------------------------------------------