├── .gitignore ├── nimsimd.nimble ├── examples ├── simple.nim └── simplex_noise.nim ├── LICENSE ├── README.md └── src ├── nimsimd.nim └── nimsimd ├── mmx.nim ├── sse.nim ├── avx.nim ├── avx2.nim └── sse2.nim /.gitignore: -------------------------------------------------------------------------------- 1 | *.exe 2 | nimcache/ 3 | bin/ -------------------------------------------------------------------------------- /nimsimd.nimble: -------------------------------------------------------------------------------- 1 | # Package 2 | 3 | version = "0.1.0" 4 | author = "Jack Mott" 5 | description = "simd library" 6 | license = "MIT" 7 | srcDir = "src" 8 | 9 | 10 | # Dependencies 11 | 12 | -------------------------------------------------------------------------------- /examples/simple.nim: -------------------------------------------------------------------------------- 1 | import ../src/nimsimd 2 | import ../src/nimsimd/sse2 3 | import ../src/nimsimd/avx2 4 | 5 | var 6 | a = newSeq[float32](16) 7 | b = newSeq[float32](16) 8 | r = newSeq[float32](16) 9 | 10 | for i,v in a: 11 | a[i] = float32(i) 12 | b[i] = float32(i*2) 13 | 14 | SIMD: 15 | echo "SIMD lane width in bytes:" & $simd.width 16 | for i in countup(0,pred(a.len),simd.width div 4): 17 | var av = simd.loadu_ps(addr a[i]) 18 | var bv = simd.loadu_ps(addr b[i]) 19 | var rv = simd.add_ps(av,bv) 20 | simd.storeu_ps(addr r[i],rv) 21 | 22 | echo "A + B = R" 23 | echo a 24 | echo b 25 | echo r 26 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2017 Jack Mott 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # nim_simd 2 | 3 | This is an experiment / proof of concept to use Nim's metaprogramming to provide an easy to use SIMD abstraction layer. 4 | The goal is for users of the library to be able to write blocks of code containing SIMD intrinsics *one time* and end up with optimum 5 | or near optimum SIMD instructions being used at runtime according to the users hardware. If you are interested in this, you may be interested in my other Rust library, which is a more complete version of the same idea: [SIMDeez](https://github.com/jackmott/simdeez) 6 | 7 | How this will work: 8 | 9 | * The user will write a block of code containing SIMD intrinsics using the `simd:` macro 10 | * At *compile time* the macro will produce all desired simd versions of the block. sse, sse2, avx, and so on 11 | * At *run time* the simd instructions available on the computer will be detected, and the appropriate version of the code block will be executed 12 | 13 | # Progress So Far: 14 | 15 | See simd.nim for the proof of concept so far. Already you can write code like this successfully: 16 | 17 | ```nim 18 | var 19 | a = newSeq[float32](12) 20 | b = newSeq[float32](12) 21 | r = newSeq[float32](12) 22 | 23 | for i,v in a: 24 | a[i] = float32(i) 25 | b[i] = 2.0'f32 26 | 27 | 28 | SIMD: 29 | for i in countup(0,