├── LICENSE ├── Makefile ├── README.md ├── common_test.go ├── go.mod ├── math.go ├── math_test.go ├── random.go ├── random_test.go ├── scale.go ├── scale_test.go ├── trig.go └── trig_test.go /LICENSE: -------------------------------------------------------------------------------- 1 | The MIT License (MIT) 2 | 3 | Copyright (c) 2019 Brad Erickson 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy of 6 | this software and associated documentation files (the "Software"), to deal in 7 | the Software without restriction, including without limitation the rights to 8 | use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of 9 | the Software, and to permit persons to whom the Software is furnished to do so, 10 | subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS 17 | FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR 18 | COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER 19 | IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 20 | CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 21 | -------------------------------------------------------------------------------- /Makefile: -------------------------------------------------------------------------------- 1 | .PHONY: all 2 | all: test bench 3 | 4 | .PHONY: test 5 | test: 6 | go test -v . 7 | 8 | .PHONY: bench 9 | bench: 10 | go test -bench=. -benchmem 11 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # FastMath for Go 2 | 3 | > "I can make things very fast if they don’t have to be correct." — Russ Cox 4 | 5 | 8 and 16 bit math functions for when speed matters more than precision. 6 | Potential use cases include LED displays, 2D/3D graphics, and games. 7 | 8 | * Designed for use with [TinyGo][tinygo] and/or [WebAssembly][go-wasm]. 9 | * Based on the [FastLED][fastled] [lib8tion][lib8ation-src] library. 10 | 11 | [tinygo]:https://tinygo.org/ 12 | [go-wasm]:https://github.com/golang/go/wiki/WebAssembly 13 | [fastled]:http://fastled.io/ 14 | [lib8ation-src]:https://raw.githubusercontent.com/FastLED/FastLED/dcbf3993/lib8tion/trig8.h 15 | 16 | ## Functions 17 | 18 | * `Sin8()` / `Sin16()` 19 | * `Cos8()` / `Cos16()` 20 | * `Random8()` / `Random16()` 21 | * `Random8Limit()` / `Random16Limit()` 22 | * `Random8Range()` / `Random16Range()` 23 | * `Random16SetSeed()` / `Random16GetSeed()` / `Random16AddEntropy()` 24 | * `Scale8()` / `Scale8Video()` 25 | * `NScale8x3()` / `NScale8x3Video()` - Scale three 8 bit integers at the same time. 26 | * `Scale16()` / `Scale16By8()` 27 | * `QAdd8()` / `QSub8()` / `QMul8()` - Saturating non-overflowing math functions. 28 | * `Abs8()` 29 | * `Sqrt16()` 30 | 31 | Note: Functionality already well handled by the Go runtime has not be re-implemented. 32 | 33 | ## Approximation Error 34 | 35 | Computer-based math functions have an error delta verses the pure mathematical 36 | results. The Golang Standard Library's math functions are precise up to 64 bit 37 | floats. The math functions provided by this library sacrifice additional 38 | precision for speed by working with small integers. 39 | 40 | * `Sin8()` - Max Error: 1.63%, Average Error: 0.78% 41 | * `Sin16()` - Max Error: 0.34%, Average Error: 0.19% 42 | 43 | ## Benchmarks 44 | 45 | Run on a Intel(R) Core(TM) i7-7600U CPU @ 2.80GHz. 46 | 47 | ```bash 48 | BenchmarkStdLibFallbackSqrt-4 20000000 50.7 ns/op 49 | BenchmarkStdLibDefaultSqrt-4 2000000000 0.30 ns/op 50 | BenchmarkSqrt16-4 200000000 9.12 ns/op 51 | BenchmarkStdLibRandom8-4 50000000 25.6 ns/op 52 | BenchmarkRandom8-4 1000000000 2.12 ns/op 53 | BenchmarkStdLibSin8-4 50000000 20.8 ns/op 54 | BenchmarkSin8-4 300000000 4.07 ns/op 55 | BenchmarkStdLibSin16-4 50000000 20.0 ns/op 56 | BenchmarkSin16-4 2000000000 0.94 ns/op 57 | ``` 58 | 59 | `Random8()`, `Sin8()` and `Sin16()` are significantly faster than using the 60 | equivalent Go Standard Library's Math package functions. 61 | 62 | `Sqrt16()` is compared against both the default compiled `math.Sqrt()` and a 63 | copy of the fallback Standard Library `sqrt()` function. The default version is 64 | optimized by the Go compiler into a single instruction on the AMD64 65 | architecture, so the fallback version is used for a fair comparison. 66 | 67 | ## TODO 68 | 69 | * Should `Sin8()` be a lookup table? Why is it 4ns/op vs `Sin16()`@0.9ns/op? 70 | * Add ARM assembly implementations from upstream, benchmark difference. 71 | 72 | ## License 73 | 74 | Licensed MIT 75 | 76 | © 2019 Brad Erickson 77 | 78 | Based on FastLED MIT-licensed code: 79 | 80 | © 2013 FastLED 81 | 82 | Parts of test-only BSD code: 83 | 84 | © 2009 The Go Authors 85 | -------------------------------------------------------------------------------- /common_test.go: -------------------------------------------------------------------------------- 1 | package fastmath_test 2 | 3 | // A global result variables to trick the compiler during benchmarks into not 4 | // optimizing the functions out. 5 | var result8 uint8 6 | var result16 int16 7 | -------------------------------------------------------------------------------- /go.mod: -------------------------------------------------------------------------------- 1 | module github.com/13rac1/fastmath 2 | 3 | go 1.15 4 | -------------------------------------------------------------------------------- /math.go: -------------------------------------------------------------------------------- 1 | package fastmath 2 | 3 | // Fast, efficient 8-bit math functions specifically 4 | // designed for high-performance LED programming. 5 | 6 | // Note: Only functions not provided by the Go runtime are implemented. 7 | 8 | // QAdd8 adds one byte to another, saturating at 0xFF. 9 | // Accepts: 10 | // * i - first byte to add 11 | // * j - second byte to add 12 | // Returns the sum of i & j, capped at 0xFF 13 | func QAdd8(i, j uint8) uint8 { 14 | t := uint16(i) + uint16(j) 15 | if t > 255 { 16 | t = 255 17 | } 18 | return uint8(t) 19 | } 20 | 21 | // QSub8 subtracts one byte from another, saturating at 0x00. 22 | // Returns i - j with a floor of 0 23 | func QSub8(i, j uint8) uint8 { 24 | t := int16(i) - int16(j) 25 | if t < 0 { 26 | t = 0 27 | } 28 | return uint8(t) 29 | } 30 | 31 | // QMul8 performs saturating 8x8 bit multiplication, with 8 bit result. 32 | // Returns the product of i * j, capping at 0xFF 33 | func QMul8(i, j uint8) uint8 { 34 | p := uint16(i) * uint16(j) 35 | if p > 255 { 36 | p = 255 37 | } 38 | return uint8(p) 39 | } 40 | 41 | // Abs8 finds the absolute value of a signed 8-bit int8 42 | func Abs8(i int8) int8 { 43 | if i < 0 { 44 | i = -i 45 | } 46 | return i 47 | } 48 | 49 | // Sqrt16 finds the square root for 16-bit integers. 50 | // This function is significantly slower, ~20X, on Intel/AMD CPUs. It should 51 | // be much faster on a microcontroller though. 52 | // 53 | // Note: Sqrt is implemented in assembly on some systems. 54 | // Others have assembly stubs that jump to func sqrt below. 55 | // On systems where Sqrt is a single instruction, the compiler 56 | // may turn a direct call into a direct use of that instruction instead. 57 | // src: https://golang.org/src/math/sqrt.go 58 | func Sqrt16(x uint16) uint8 { 59 | if x <= 1 { 60 | return uint8(x) 61 | } 62 | 63 | var low uint8 = 1 // lower bound 64 | var hi, mid uint8 65 | 66 | if x > 7904 { 67 | hi = 255 68 | } else { 69 | // initial estimate for upper bound 70 | hi = uint8((x >> 5) + 8) 71 | } 72 | 73 | for hi >= low { 74 | mid = uint8((uint16(low) + uint16(hi)) >> 1) 75 | if uint16(mid)*uint16(mid) > x { 76 | hi = mid - 1 77 | } else { 78 | if mid == 255 { 79 | return 255 80 | } 81 | low = mid + 1 82 | } 83 | } 84 | 85 | return low - 1 86 | } 87 | -------------------------------------------------------------------------------- /math_test.go: -------------------------------------------------------------------------------- 1 | package fastmath_test 2 | 3 | import ( 4 | "fmt" 5 | "math" 6 | "testing" 7 | 8 | "github.com/13rac1/fastmath" 9 | ) 10 | 11 | func TestQAdd8(t *testing.T) { 12 | if fastmath.QAdd8(255, 128) != 255 { 13 | t.Fatal("QAdd8() is not saturating") 14 | } 15 | } 16 | 17 | func TestQSub8(t *testing.T) { 18 | if fastmath.QSub8(128, 255) != 0 { 19 | t.Fatal("QSub8() is not saturating") 20 | } 21 | } 22 | 23 | func TestQMul8(t *testing.T) { 24 | if fastmath.QMul8(255, 255) != 255 { 25 | t.Fatal("QMul8() is not saturating") 26 | } 27 | } 28 | 29 | func TestAbs8(t *testing.T) { 30 | if fastmath.Abs8(-100) != 100 { 31 | t.Fatal("Abs8() absolute value not found") 32 | } 33 | } 34 | 35 | func TestSqrt16(t *testing.T) { 36 | testCases := []struct { 37 | in uint16 38 | expected uint8 39 | }{{ 40 | in: 0, 41 | expected: 0, 42 | }, { 43 | in: 2, 44 | expected: 1, 45 | }, { 46 | in: 255, 47 | expected: 15, 48 | }, { 49 | in: 128, 50 | expected: 11, 51 | }, { 52 | in: 2500, 53 | expected: 50, 54 | }, { 55 | in: 5000, 56 | expected: 70, 57 | }, { 58 | in: 7500, 59 | expected: 86, 60 | }, { 61 | in: 10000, 62 | expected: 100, 63 | }} 64 | for _, test := range testCases { 65 | name := fmt.Sprintf("Sqrt16(%d)", test.in) 66 | t.Run(name, func(t *testing.T) { 67 | r := fastmath.Sqrt16(test.in) 68 | if test.expected != r { 69 | t.Fatalf("expected: %d, found: %d", test.expected, r) 70 | } 71 | }) 72 | } 73 | } 74 | 75 | const ( 76 | mask = 0x7FF 77 | shift = 64 - 11 - 1 78 | bias = 1023 79 | ) 80 | 81 | // Copied from https://golang.org/src/math/sqrt.go to avoid compiler 82 | // optimizations into a single assembly instruction on many architectures. 83 | // Copyright 2009 The Go Authors. All rights reserved. 84 | func sqrt(x float64) float64 { 85 | // special cases 86 | switch { 87 | case x == 0 || math.IsNaN(x) || math.IsInf(x, 1): 88 | return x 89 | case x < 0: 90 | return math.NaN() 91 | } 92 | ix := math.Float64bits(x) 93 | // normalize x 94 | exp := int((ix >> shift) & mask) 95 | if exp == 0 { // subnormal x 96 | for ix&(1<>= 1 // exp = exp/2, exponent of square root 109 | // generate sqrt(x) bit by bit 110 | ix <<= 1 111 | var q, s uint64 // q = sqrt(x) 112 | r := uint64(1 << (shift + 1)) // r = moving bit from MSB to LSB 113 | for r != 0 { 114 | t := s + r 115 | if t <= ix { 116 | s = t + r 117 | ix -= t 118 | q += r 119 | } 120 | ix <<= 1 121 | r >>= 1 122 | } 123 | // final rounding 124 | if ix != 0 { // remainder, result not exact 125 | q += q & 1 // round according to extra bit 126 | } 127 | ix = q>>1 + uint64(exp-1+bias)<>8)) 21 | } 22 | 23 | // Random16 generates a 16 bit random number. 24 | func Random16() uint16 { 25 | rand16seed = (rand16seed * rand2053) + rand13849 26 | return rand16seed 27 | } 28 | 29 | // Random8Limit generates an 8-bit random number between 0 and lim. 30 | // Accept lim the upper bound for the result 31 | func Random8Limit(lim uint8) uint8 { 32 | if lim == 0 { 33 | return 0 34 | } 35 | r := Random8() 36 | r = uint8((uint16(r) * uint16(lim)) >> 8) 37 | return r 38 | } 39 | 40 | // Random8Range generates an 8-bit random number in the given range. 41 | // Accepts min the lower bound for the random number 42 | // Accepts lim the upper bound for the random number 43 | func Random8Range(min, lim uint8) uint8 { 44 | if min > lim { 45 | return lim 46 | } 47 | delta := lim - min 48 | r := Random8Limit(delta) + min 49 | return r 50 | } 51 | 52 | // Random16Limit generates an 16-bit random number between 0 and lim. 53 | // Accepts lim the upper bound for the result 54 | func Random16Limit(lim uint16) uint16 { 55 | if lim == 0 { 56 | return 0 57 | } 58 | r := Random16() 59 | p := uint32(lim) * uint32(r) 60 | r = uint16(p >> 16) 61 | return uint16(r) 62 | } 63 | 64 | // Random16Range generates an 16-bit random number in the given range. 65 | // Accept min the lower bound for the random number 66 | // Return lim the upper bound for the random number 67 | func Random16Range(min, lim uint16) uint16 { 68 | delta := lim - min 69 | r := Random16Limit(delta) + min 70 | return r 71 | } 72 | 73 | // Random16SetSeed sets the 16-bit seed used for the random number generator. 74 | func Random16SetSeed(seed uint16) { 75 | rand16seed = seed 76 | } 77 | 78 | // Random16GetSeed gets the current seed value for the random number generator. 79 | func Random16GetSeed() uint16 { 80 | return rand16seed 81 | } 82 | 83 | // Random16AddEntropy adds entropy into the random number generator. 84 | func Random16AddEntropy(entropy uint16) { 85 | rand16seed += entropy 86 | } 87 | -------------------------------------------------------------------------------- /random_test.go: -------------------------------------------------------------------------------- 1 | package fastmath_test 2 | 3 | import ( 4 | "math/rand" 5 | "testing" 6 | 7 | "github.com/13rac1/fastmath" 8 | ) 9 | 10 | func TestRandom8(t *testing.T) { 11 | t.Run("SetSeed", func(t *testing.T) { 12 | fastmath.Random16SetSeed(0) 13 | 14 | a := fastmath.Random8() 15 | fastmath.Random16SetSeed(0) 16 | b := fastmath.Random8() 17 | if a != b { 18 | t.Fatal("same seed random did not match") 19 | } 20 | }) 21 | t.Run("Limit", func(t *testing.T) { 22 | for lim := 1; lim < 256; lim++ { 23 | for x := 0; x < 1000; x++ { 24 | if fastmath.Random8Limit(uint8(lim)) > uint8(lim) { 25 | t.Fatalf("over limit: %d", lim) 26 | } 27 | } 28 | } 29 | }) 30 | t.Run("Range", func(t *testing.T) { 31 | for min := 1; min < 256; min++ { 32 | for lim := min; lim < 256; lim++ { 33 | for x := 0; x < 1000; x++ { 34 | v := fastmath.Random8Range(uint8(min), uint8(lim)) 35 | if v < uint8(min) { 36 | t.Fatalf("under min: %d", min) 37 | } 38 | if v > uint8(lim) { 39 | t.Fatalf("over limit: %d", lim) 40 | } 41 | } 42 | } 43 | } 44 | 45 | if fastmath.Random8Range(10, 0) == 10 { 46 | t.Fatalf("expected limit, found min") 47 | } 48 | }) 49 | } 50 | 51 | func TestRandom16(t *testing.T) { 52 | t.Run("Seed", func(t *testing.T) { 53 | fastmath.Random16SetSeed(0) 54 | a := fastmath.Random16() 55 | fastmath.Random16SetSeed(0) 56 | b := fastmath.Random16() 57 | if a != b { 58 | t.Fatal("same seed random did not match") 59 | } 60 | }) 61 | t.Run("Limit", func(t *testing.T) { 62 | for lim := 1; lim < 65535; lim++ { 63 | for x := 0; x < 1000; x++ { 64 | if fastmath.Random16Limit(uint16(lim)) > uint16(lim) { 65 | t.Fatalf("over limit: %d", lim) 66 | } 67 | } 68 | } 69 | }) 70 | t.Run("Range", func(t *testing.T) { 71 | for min := 1; min < 65535; min += 256 { 72 | for lim := min; lim < 65535; lim += 256 { 73 | for x := 0; x < 1000; x++ { 74 | v := fastmath.Random16Range(uint16(min), uint16(lim)) 75 | if v < uint16(min) { 76 | t.Fatalf("under min: %d", min) 77 | } 78 | if v > uint16(lim) { 79 | t.Fatalf("over limit: %d", lim) 80 | } 81 | } 82 | } 83 | } 84 | 85 | if fastmath.Random16Range(10, 0) == 10 { 86 | t.Fatalf("expected limit, found min") 87 | } 88 | }) 89 | 90 | } 91 | 92 | func BenchmarkStdLibRandom8(b *testing.B) { 93 | var r uint8 94 | for n := 0; n < b.N; n++ { 95 | r = uint8(rand.Intn(255)) 96 | } 97 | result8 = r 98 | } 99 | 100 | func BenchmarkRandom8(b *testing.B) { 101 | var r uint8 102 | for n := 0; n < b.N; n++ { 103 | r = fastmath.Random8Limit(255) 104 | } 105 | result8 = r 106 | } 107 | -------------------------------------------------------------------------------- /scale.go: -------------------------------------------------------------------------------- 1 | package fastmath 2 | 3 | // Fast, efficient 8-bit scaling functions specifically 4 | // designed for high-performance LED programming. 5 | 6 | // Scale8 scales one byte by a second one, which is treated as the numerator of 7 | // a fraction whose denominator is 256. In other words, it computes i * (scale / 8 | // 256) 9 | func Scale8(i uint8, scale uint8) uint8 { 10 | return uint8((uint16(i) * (1 + uint16(scale))) >> 8) 11 | } 12 | 13 | // Scale8Video is the "video" version of Scale8. Guarantees the output will be 14 | // only be zero if one or both of the inputs are zero. If both inputs are 15 | // non-zero, the output is guaranteed to be non-zero. This makes for better 16 | // 'video'/LED dimming, at the cost of several additional cycles. 17 | func Scale8Video(i, scale uint8) uint8 { 18 | var scaleFixed uint16 19 | if i != 0 && scale != 0 { 20 | scaleFixed = 1 21 | } 22 | return uint8((uint16(i)*uint16(scale))>>8 + scaleFixed) 23 | } 24 | 25 | // NScale8x3 scales three one byte values by a fourth one, which is treated as 26 | // the numerator of a fraction whose demominator is 256. In other words, it 27 | // computes r,g,b * (scale / 256). 28 | func NScale8x3(r, g, b *uint8, scale uint8) { 29 | scaleFixed := uint16(scale) + 1 30 | *r = uint8((uint16(*r) * scaleFixed) >> 8) 31 | *g = uint8((uint16(*g) * scaleFixed) >> 8) 32 | *b = uint8((uint16(*b) * scaleFixed) >> 8) 33 | } 34 | 35 | // NScale8x3Video scale three one byte values by a fourth one, which is treated 36 | // as the numerator of a fraction whose demominator is 256. In other words, it 37 | // computes r,g,b * (scale / 256), ensuring that non-zero values passed in 38 | // remain non zero, no matter how low the scale argument. 39 | func NScale8x3Video(r, g, b *uint8, scale uint8) { 40 | var nonZeroScale uint16 41 | if scale != 0 { 42 | nonZeroScale = 1 43 | } 44 | if *r != 0 { 45 | *r = uint8((uint16(*r)*uint16(scale))>>8 + nonZeroScale) 46 | } 47 | if *g != 0 { 48 | *g = uint8((uint16(*g)*uint16(scale))>>8 + nonZeroScale) 49 | } 50 | if *b != 0 { 51 | *b = uint8((uint16(*b)*uint16(scale))>>8 + nonZeroScale) 52 | } 53 | } 54 | 55 | // Scale16By8 scales a 16-bit unsigned value by an 8-bit value, considered as 56 | // numerator of a fraction whose denominator is 256. In other words, it computes 57 | // i * (scale / 256). 58 | func Scale16By8(i uint16, scale uint8) uint16 { 59 | return uint16((uint32(i) * (1 + uint32(scale))) >> 8) 60 | } 61 | 62 | // Scale16 scales a 16-bit unsigned value by a 16-bit value, considered as 63 | // numerator of a fraction whose denominator is 65536. In other words, it 64 | // computes i * (scale / 65536). 65 | func Scale16(i, scale uint16) uint16 { 66 | return uint16((uint32(i) * (1 + uint32(scale))) / 65536) 67 | } 68 | -------------------------------------------------------------------------------- /scale_test.go: -------------------------------------------------------------------------------- 1 | package fastmath_test 2 | 3 | import ( 4 | "fmt" 5 | "testing" 6 | 7 | "github.com/13rac1/fastmath" 8 | ) 9 | 10 | func TestScale8(t *testing.T) { 11 | testCases := []struct { 12 | in uint8 13 | scale uint8 14 | expected uint8 15 | }{{ 16 | in: 0, 17 | scale: 0, 18 | expected: 0, 19 | }, { 20 | in: 0, 21 | scale: 255, 22 | expected: 0, 23 | }, { 24 | in: 255, 25 | scale: 0, 26 | expected: 0, 27 | }, { 28 | in: 255, 29 | scale: 255, 30 | expected: 255, 31 | }, { 32 | in: 255, 33 | scale: 128, 34 | expected: 128, 35 | }, { 36 | in: 128, 37 | scale: 255, 38 | expected: 128, 39 | }, { 40 | in: 128, 41 | scale: 128, 42 | expected: 64, 43 | }, { 44 | in: 64, 45 | scale: 128, 46 | expected: 32, 47 | }, { 48 | in: 1, 49 | scale: 1, 50 | expected: 0, // Primary difference vs Scale8Video 51 | }} 52 | 53 | for _, test := range testCases { 54 | name := fmt.Sprintf("Scale8(%d,%d)", test.in, test.scale) 55 | t.Run(name, func(t *testing.T) { 56 | r := fastmath.Scale8(test.in, test.scale) 57 | if test.expected != r { 58 | t.Fatalf("expected: %d, found: %d", test.expected, r) 59 | } 60 | }) 61 | 62 | name = fmt.Sprintf("NScale8x3(%d,%d,%d,%d)", test.in, test.in, test.in, test.scale) 63 | t.Run(name, func(t *testing.T) { 64 | r := test.in 65 | g := test.in 66 | b := test.in 67 | fastmath.NScale8x3(&r, &g, &b, test.scale) 68 | if test.expected != r || test.expected != g || test.expected != b { 69 | t.Fatalf("expected: %d, found: (%d,%d,%d)", test.expected, r, g, b) 70 | } 71 | }) 72 | } 73 | } 74 | 75 | func TestScale8Video(t *testing.T) { 76 | testCases := []struct { 77 | in uint8 78 | scale uint8 79 | expected uint8 80 | }{{ 81 | in: 0, 82 | scale: 0, 83 | expected: 0, 84 | }, { 85 | in: 0, 86 | scale: 255, 87 | expected: 0, 88 | }, { 89 | in: 255, 90 | scale: 0, 91 | expected: 0, 92 | }, { 93 | in: 255, 94 | scale: 255, 95 | expected: 255, 96 | }, { 97 | in: 255, 98 | scale: 128, 99 | expected: 128, 100 | }, { 101 | in: 128, 102 | scale: 255, 103 | expected: 128, 104 | }, { 105 | in: 128, 106 | scale: 128, 107 | expected: 65, 108 | }, { 109 | in: 64, 110 | scale: 128, 111 | expected: 33, 112 | }, { 113 | in: 1, 114 | scale: 1, 115 | expected: 1, // Primary difference vs Scale8 116 | }} 117 | 118 | for _, test := range testCases { 119 | name := fmt.Sprintf("Scale8Video(%d,%d)", test.in, test.scale) 120 | t.Run(name, func(t *testing.T) { 121 | r := fastmath.Scale8Video(test.in, test.scale) 122 | if test.expected != r { 123 | t.Fatalf("expected: %d, found: %d", test.expected, r) 124 | } 125 | }) 126 | 127 | name = fmt.Sprintf("NScale8x3Video(%d,%d,%d,%d)", test.in, test.in, test.in, test.scale) 128 | t.Run(name, func(t *testing.T) { 129 | r := test.in 130 | g := test.in 131 | b := test.in 132 | fastmath.NScale8x3Video(&r, &g, &b, test.scale) 133 | if test.expected != r || test.expected != g || test.expected != b { 134 | t.Fatalf("expected: %d, found: (%d,%d,%d)", test.expected, r, g, b) 135 | } 136 | }) 137 | } 138 | } 139 | 140 | func TestScale16By8(t *testing.T) { 141 | testCases := []struct { 142 | in uint16 143 | scale uint8 144 | expected uint16 145 | }{{ 146 | in: 0, 147 | scale: 0, 148 | expected: 0, 149 | }, { 150 | in: 0, 151 | scale: 255, 152 | expected: 0, 153 | }, { 154 | in: 255, 155 | scale: 0, 156 | expected: 0, 157 | }, { 158 | in: 255, 159 | scale: 255, 160 | expected: 255, 161 | }, { 162 | in: 65535, 163 | scale: 255, 164 | expected: 65535, 165 | }, { 166 | in: 65535, 167 | scale: 127, 168 | expected: 32767, 169 | }} 170 | 171 | for _, test := range testCases { 172 | name := fmt.Sprintf("Scale16By8(%d,%d)", test.in, test.scale) 173 | t.Run(name, func(t *testing.T) { 174 | r := fastmath.Scale16By8(test.in, test.scale) 175 | if test.expected != r { 176 | t.Fatalf("expected: %d, found: %d", test.expected, r) 177 | } 178 | }) 179 | } 180 | } 181 | 182 | func TestScale16(t *testing.T) { 183 | testCases := []struct { 184 | in uint16 185 | scale uint16 186 | expected uint16 187 | }{{ 188 | in: 0, 189 | scale: 0, 190 | expected: 0, 191 | }, { 192 | in: 0, 193 | scale: 65535, 194 | expected: 0, 195 | }, { 196 | in: 65535, 197 | scale: 0, 198 | expected: 0, 199 | }, { 200 | in: 255, 201 | scale: 65535, 202 | expected: 255, 203 | }, { 204 | in: 65535, 205 | scale: 255, 206 | expected: 255, 207 | }, { 208 | in: 65535, 209 | scale: 32767, 210 | expected: 32767, 211 | }, { 212 | in: 256, 213 | scale: 32767, 214 | expected: 128, 215 | }} 216 | 217 | for _, test := range testCases { 218 | name := fmt.Sprintf("Scale16By8(%d,%d)", test.in, test.scale) 219 | t.Run(name, func(t *testing.T) { 220 | r := fastmath.Scale16(test.in, test.scale) 221 | if test.expected != r { 222 | t.Fatalf("expected: %d, found: %d", test.expected, r) 223 | } 224 | }) 225 | } 226 | } 227 | -------------------------------------------------------------------------------- /trig.go: -------------------------------------------------------------------------------- 1 | package fastmath 2 | 3 | // Trig Fast trig functions 4 | // 5 | // Fast 8 and 16-bit approximations of sin(x) and cos(x). 6 | // Don't use these approximations for calculating the 7 | // Trajectory of a rocket to Mars, but they're great 8 | // for art projects and LED displays. 9 | 10 | // PI8 is the value of π in 8 bit math. 11 | const PI8 uint8 = 127 12 | 13 | // PI16 is the value of π in 16 bit math. 14 | const PI16 uint16 = 32768 15 | 16 | var bM16Interleave = []uint8{0, 49, 49, 41, 90, 27, 117, 10} 17 | 18 | // Sin8 is a fast 8-bit approximation of sin(x). This approximation never varies 19 | // more than 2% from the floating point value. 20 | // 21 | // This is a Golang translation of the FastLED lib8tion sin8_C() function. 22 | // https://raw.githubusercontent.com/FastLED/FastLED/dcbf3993/lib8tion/trig8.h 23 | // 24 | // Accepts theta input angle from 0-255. 25 | // Returns sin of theta, value between 0 and 255 26 | func Sin8(theta uint8) uint8 { 27 | offset := theta 28 | if theta&0x40 != 0 { 29 | offset = 255 - offset 30 | } 31 | offset &= 0x3F // 0..63 32 | 33 | secoffset := offset & 0x0F // 0..15 34 | if theta&0x40 != 0 { 35 | secoffset++ 36 | } 37 | 38 | section := offset >> 4 // 0..3 39 | s2 := section * 2 40 | 41 | var p uint8 42 | p += s2 43 | b := bM16Interleave[p] 44 | p++ 45 | m16 := bM16Interleave[p] 46 | 47 | // Must be cast int16 so multiplication can result be greater than 255. 48 | mx := (int16(m16) * int16(secoffset)) >> 4 49 | 50 | // Must be int16 to be negative and allow adding 128. 51 | var y int16 52 | y = mx + int16(b) 53 | if theta&0x80 != 0 { 54 | y = -y 55 | } 56 | 57 | y += 128 58 | return uint8(y) 59 | } 60 | 61 | // Cos8 is a fast 8-bit approximation of cos(x). This approximation never varies 62 | // more than 2% from the floating point value. 63 | // 64 | // Accepts theta input angle from 0-255 65 | // Returns sin of theta, value between 0 and 255 66 | func Cos8(theta uint8) uint8 { 67 | return Sin8(theta + 64) 68 | } 69 | 70 | var base = []uint16{0, 6393, 12539, 18204, 23170, 27245, 30273, 32137} 71 | var slope = []uint8{49, 48, 44, 38, 31, 23, 14, 4} 72 | 73 | // Sin16 is a fast 16-bit approximation of sin(x). This approximation never 74 | // varies more than 0.69% from the floating point value. 75 | // 76 | // This is a Golang translation of the FastLED lib8tion sin16_C() function. 77 | // https://raw.githubusercontent.com/FastLED/FastLED/dcbf3993/lib8tion/trig8.h 78 | // 79 | // Accepts theta input angle from 0-65535. 80 | // Returns sin of theta, value between -32767 to 32767. 81 | func Sin16(theta uint16) int16 { 82 | offset := (theta & 0x3FFF) >> 3 // 0..2047 83 | if theta&0x4000 != 0 { 84 | offset = 2047 - offset 85 | } 86 | 87 | section := offset / 256 // 0..7 88 | b := base[section] 89 | m := slope[section] 90 | 91 | secoffset8 := uint8(offset) / 2 92 | 93 | mx := int16(m) * int16(secoffset8) 94 | y := int16(mx) + int16(b) 95 | 96 | if theta&0x8000 != 0 { 97 | y = -y 98 | } 99 | 100 | return y 101 | } 102 | 103 | // Cos16 is a fast 16-bit approximation of cos(x). This approximation never 104 | // varies more than 0.69% from the floating point value. 105 | // 106 | // Accepts theta input angle from 0-65535. 107 | // Returns sin of theta, value between -32767 to 32767. 108 | func Cos16(theta uint16) int16 { 109 | return Sin16(theta + 16384) 110 | } 111 | -------------------------------------------------------------------------------- /trig_test.go: -------------------------------------------------------------------------------- 1 | package fastmath_test 2 | 3 | import ( 4 | "math" 5 | "testing" 6 | 7 | "github.com/13rac1/fastmath" 8 | ) 9 | 10 | // Complete range of output from the FastLED sin8_C() function. 11 | // int main() { 12 | // for (int x = 0; x < 256; x++) { 13 | // printf("%d,", sin8(x)); 14 | // if (x%8 == 7) { 15 | // printf("\n"); 16 | // } 17 | // } 18 | var fastLEDSin8 = [256]uint8{ 19 | 128, 131, 134, 137, 140, 143, 146, 149, 20 | 152, 155, 158, 161, 164, 167, 170, 173, 21 | 177, 179, 182, 184, 187, 189, 192, 194, 22 | 197, 200, 202, 205, 207, 210, 212, 215, 23 | 218, 219, 221, 223, 224, 226, 228, 229, 24 | 231, 233, 234, 236, 238, 239, 241, 243, 25 | 245, 245, 246, 246, 247, 248, 248, 249, 26 | 250, 250, 251, 251, 252, 253, 253, 254, 27 | 255, 254, 253, 253, 252, 251, 251, 250, 28 | 250, 249, 248, 248, 247, 246, 246, 245, 29 | 245, 243, 241, 239, 238, 236, 234, 233, 30 | 231, 229, 228, 226, 224, 223, 221, 219, 31 | 218, 215, 212, 210, 207, 205, 202, 200, 32 | 197, 194, 192, 189, 187, 184, 182, 179, 33 | 177, 173, 170, 167, 164, 161, 158, 155, 34 | 152, 149, 146, 143, 140, 137, 134, 131, 35 | 128, 125, 122, 119, 116, 113, 110, 107, 36 | 104, 101, 98, 95, 92, 89, 86, 83, 37 | 79, 77, 74, 72, 69, 67, 64, 62, 38 | 59, 56, 54, 51, 49, 46, 44, 41, 39 | 38, 37, 35, 33, 32, 30, 28, 27, 40 | 25, 23, 22, 20, 18, 17, 15, 13, 41 | 11, 11, 10, 10, 9, 8, 8, 7, 42 | 6, 6, 5, 5, 4, 3, 3, 2, 43 | 1, 2, 3, 3, 4, 5, 5, 6, 44 | 6, 7, 8, 8, 9, 10, 10, 11, 45 | 11, 13, 15, 17, 18, 20, 22, 23, 46 | 25, 27, 28, 30, 32, 33, 35, 37, 47 | 38, 41, 44, 46, 49, 51, 54, 56, 48 | 59, 62, 64, 67, 69, 72, 74, 77, 49 | 79, 83, 86, 89, 92, 95, 98, 101, 50 | 104, 107, 110, 113, 116, 119, 122, 125, 51 | } 52 | 53 | // Range of output from the FastLED sin16_C() function. 54 | // for (int x = 0; x < 65535; x+=256) { 55 | // printf("%d,", sin16(x)); 56 | // if (x%2048 == 1792) { 57 | // printf("\n"); 58 | // } 59 | // } 60 | var fastLEDSin16 = [256]int16{ 61 | 0, 784, 1568, 2352, 3136, 3920, 4704, 5488, 62 | 6393, 7161, 7929, 8697, 9465, 10233, 11001, 11769, 63 | 12539, 13243, 13947, 14651, 15355, 16059, 16763, 17467, 64 | 18204, 18812, 19420, 20028, 20636, 21244, 21852, 22460, 65 | 23170, 23666, 24162, 24658, 25154, 25650, 26146, 26642, 66 | 27245, 27613, 27981, 28349, 28717, 29085, 29453, 29821, 67 | 30273, 30497, 30721, 30945, 31169, 31393, 31617, 31841, 68 | 32137, 32201, 32265, 32329, 32393, 32457, 32521, 32585, 69 | 32645, 32581, 32517, 32453, 32389, 32325, 32261, 32197, 70 | 32051, 31827, 31603, 31379, 31155, 30931, 30707, 30483, 71 | 30166, 29798, 29430, 29062, 28694, 28326, 27958, 27590, 72 | 27107, 26611, 26115, 25619, 25123, 24627, 24131, 23635, 73 | 23030, 22422, 21814, 21206, 20598, 19990, 19382, 18774, 74 | 18127, 17423, 16719, 16015, 15311, 14607, 13903, 13199, 75 | 12489, 11721, 10953, 10185, 9417, 8649, 7881, 7113, 76 | 6223, 5439, 4655, 3871, 3087, 2303, 1519, 735, 77 | 0, -784, -1568, -2352, -3136, -3920, -4704, -5488, 78 | -6393, -7161, -7929, -8697, -9465, -10233, -11001, -11769, 79 | -12539, -13243, -13947, -14651, -15355, -16059, -16763, -17467, 80 | -18204, -18812, -19420, -20028, -20636, -21244, -21852, -22460, 81 | -23170, -23666, -24162, -24658, -25154, -25650, -26146, -26642, 82 | -27245, -27613, -27981, -28349, -28717, -29085, -29453, -29821, 83 | -30273, -30497, -30721, -30945, -31169, -31393, -31617, -31841, 84 | -32137, -32201, -32265, -32329, -32393, -32457, -32521, -32585, 85 | -32645, -32581, -32517, -32453, -32389, -32325, -32261, -32197, 86 | -32051, -31827, -31603, -31379, -31155, -30931, -30707, -30483, 87 | -30166, -29798, -29430, -29062, -28694, -28326, -27958, -27590, 88 | -27107, -26611, -26115, -25619, -25123, -24627, -24131, -23635, 89 | -23030, -22422, -21814, -21206, -20598, -19990, -19382, -18774, 90 | -18127, -17423, -16719, -16015, -15311, -14607, -13903, -13199, 91 | -12489, -11721, -10953, -10185, -9417, -8649, -7881, -7113, 92 | -6223, -5439, -4655, -3871, -3087, -2303, -1519, -735, 93 | } 94 | 95 | func TestSin8(t *testing.T) { 96 | for x := 0; x < 256; x++ { 97 | if fastmath.Sin8(uint8(x)) != fastLEDSin8[x] { 98 | t.Errorf("sin8(%d) expected: %d, found: %d", x, fastLEDSin8[x], fastmath.Sin8(uint8(x))) 99 | } 100 | } 101 | } 102 | 103 | func TestSin8Delta(t *testing.T) { 104 | var totalDelta float64 = 0 105 | var maxDelta float64 = 0 106 | for x := 0; x <= 256; x++ { 107 | sin8 := fastmath.Sin8(uint8(x)) 108 | xInRadians := float64(x) * 2 * math.Pi / 256 109 | stdSin8 := (math.Sin(xInRadians) + 1) / 2 * 256 110 | if sin8 == 0 || math.Round(stdSin8) == 0 { 111 | // Avoid division with zero. 112 | continue 113 | } 114 | delta := float64(sin8) - stdSin8 115 | if delta > maxDelta { 116 | maxDelta = delta 117 | } 118 | totalDelta += math.Abs(delta) 119 | 120 | // t.Logf("sin8(%d): %d, sin float: %.02f, delta: %.02f", x, sin8, stdSin8, delta) 121 | } 122 | t.Logf("max delta: %.02f, average delta: %.02f", maxDelta, totalDelta/256) 123 | averageError := totalDelta / 256 / 256 * 100 124 | t.Logf("max error: %.02f%%, average error: %.02f%%", maxDelta/256*100, averageError) 125 | if averageError > 0.8 { 126 | // TODO: Can we lower this further? 127 | t.Fatal("average error is too high") 128 | } 129 | } 130 | 131 | func TestSin16(t *testing.T) { 132 | for x := 0; x < 65535; x += 256 { 133 | if fastmath.Sin16(uint16(x)) != fastLEDSin16[x/256] { 134 | t.Errorf("sin16(%d) expected: %d, found: %d", x, fastLEDSin16[x/256], fastmath.Sin16(uint16(x))) 135 | } 136 | } 137 | } 138 | 139 | func TestSin16Delta(t *testing.T) { 140 | var totalDelta float64 = 0 141 | var maxDelta float64 = 0 142 | for x := 0; x <= 65535; x += 256 { 143 | sin16 := fastmath.Sin16(uint16(x)) 144 | xInRadians := float64(x) * 2 * math.Pi / 65535 145 | stdSin16 := math.Sin(xInRadians) / 2 * 65535 146 | if sin16 == 0 || math.Round(stdSin16) == 0 { 147 | // Avoid division with zero. 148 | continue 149 | } 150 | delta := float64(sin16) - stdSin16 151 | if math.Abs(delta) > maxDelta { 152 | maxDelta = delta 153 | } 154 | totalDelta += math.Abs(delta) 155 | 156 | // t.Logf("sin16(%d): %d, sin float: %.02f, delta: %.02f", x, sin16, stdSin16, delta) 157 | } 158 | t.Logf("max delta: %.02f, average delta: %.02f", maxDelta, totalDelta/256) 159 | averageError := totalDelta / 256 / 65536 * 100 160 | t.Logf("max error: %.02f%%, average error: %.02f%%", maxDelta/65536*100, averageError) 161 | if averageError > 0.2 { 162 | // TODO: Can we lower this further? 163 | t.Fatal("average error is too high") 164 | } 165 | } 166 | 167 | func stdLibSin8(theta uint8) uint8 { 168 | // Find the same 0-255 range as Sin8() 169 | xInRadians := float64(theta) / 255 * 2 * math.Pi 170 | return uint8(math.Round((math.Sin(xInRadians) + 1) / 2 * 255)) 171 | } 172 | 173 | func BenchmarkStdLibSin8(b *testing.B) { 174 | var r uint8 175 | x := fastmath.PI8 176 | for n := 0; n < b.N; n++ { 177 | r = stdLibSin8(x) 178 | } 179 | result8 = r 180 | } 181 | 182 | func BenchmarkSin8(b *testing.B) { 183 | var r uint8 184 | x := fastmath.PI8 185 | for n := 0; n < b.N; n++ { 186 | r = fastmath.Sin8(x) 187 | } 188 | result8 = r 189 | } 190 | 191 | func stdLibSin16(theta uint16) int16 { 192 | // Find the same -32767 to 32767 range as Sin16() 193 | xInRadians := float64(theta) / 65535 * 2 * math.Pi 194 | return int16(math.Round((math.Sin(xInRadians)) / 2 * 65535)) 195 | } 196 | 197 | func BenchmarkStdLibSin16(b *testing.B) { 198 | var r int16 199 | x := fastmath.PI16 200 | for n := 0; n < b.N; n++ { 201 | r = stdLibSin16(x) 202 | } 203 | result16 = r 204 | } 205 | 206 | func BenchmarkSin16(b *testing.B) { 207 | var r int16 208 | x := fastmath.PI16 209 | for n := 0; n < b.N; n++ { 210 | r = fastmath.Sin16(x) 211 | } 212 | result16 = r 213 | } 214 | --------------------------------------------------------------------------------