├── .codedocs ├── samples ├── CMakeLists.txt ├── greater │ ├── CMakeLists.txt │ └── greater.cpp ├── nway_tree │ ├── CMakeLists.txt │ ├── do_nothing.cpp │ └── nway_tree.cpp ├── to_lower │ ├── CMakeLists.txt │ └── do_nothing.cpp ├── bubble_sort │ └── CMakeLists.txt ├── binary_search │ ├── CMakeLists.txt │ └── do_nothing.cpp └── boyer_moore_horspool │ ├── CMakeLists.txt │ └── do_nothing.cpp ├── .gitignore ├── CMakeLists.txt ├── doc └── CMakeLists.txt ├── LICENSE ├── appveyor.yml ├── test ├── main.cpp ├── CMakeLists.txt ├── shuffle.cpp ├── bitwise.cpp ├── intravector.cpp └── arithmetic.cpp ├── include └── litesimd │ ├── detail │ ├── arch │ │ ├── intravector.h │ │ ├── bitwise.h │ │ ├── compare.h │ │ ├── shuffle.h │ │ ├── traits.h │ │ ├── arithmetic.h │ │ ├── avx │ │ │ ├── tag.h │ │ │ ├── detail │ │ │ │ └── compatibility.h │ │ │ ├── algorithm.h │ │ │ ├── bitwise.h │ │ │ ├── arithmetic.h │ │ │ ├── intravector.h │ │ │ ├── traits.h │ │ │ └── compare.h │ │ ├── sse │ │ │ ├── tag.h │ │ │ ├── algorithm.h │ │ │ ├── bitwise.h │ │ │ ├── arithmetic.h │ │ │ ├── intravector.h │ │ │ ├── traits.h │ │ │ ├── compare.h │ │ │ └── shuffle.h │ │ ├── common │ │ │ ├── traits.h │ │ │ ├── intravector.h │ │ │ ├── arch.h │ │ │ ├── shuffle.h │ │ │ ├── algorithm.h │ │ │ ├── bitwise.h │ │ │ ├── compare.h │ │ │ └── arithmetic.h │ │ └── tag.h │ └── helper_macros.h │ ├── algorithm.h │ ├── helpers │ ├── containers.h │ └── iostream.h │ ├── algorithm │ ├── detail │ │ └── for_each_loop.h │ ├── iota.h │ └── minmax.h │ ├── intravector.h │ ├── arithmetic.h │ ├── types.h │ └── shuffle.h ├── .travis.yml └── README.md /.codedocs: -------------------------------------------------------------------------------- 1 | DOXYFILE = doc/litesimd.doxy 2 | INPUT = include 3 | -------------------------------------------------------------------------------- /samples/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | 2 | # Boost is requirement for samples 3 | find_package(Boost 1.56.0 COMPONENTS timer chrono system) 4 | 5 | if(Boost_FOUND) 6 | add_subdirectory(binary_search) 7 | add_subdirectory(boyer_moore_horspool) 8 | add_subdirectory(bubble_sort) 9 | add_subdirectory(greater) 10 | add_subdirectory(nway_tree) 11 | add_subdirectory(to_lower) 12 | endif() 13 | -------------------------------------------------------------------------------- /samples/greater/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | project(greater) 2 | aux_source_directory(. SRC_LIST) 3 | add_executable(${PROJECT_NAME} 4 | ${SRC_LIST} 5 | ) 6 | 7 | target_include_directories(${PROJECT_NAME} 8 | SYSTEM PUBLIC 9 | ${Boost_INCLUDE_DIRS} 10 | ) 11 | 12 | target_link_libraries(${PROJECT_NAME} PRIVATE 13 | litesimd 14 | ${Boost_LIBRARIES} 15 | ) 16 | 17 | set_property(TARGET ${PROJECT_NAME} PROPERTY CXX_STANDARD 11) 18 | -------------------------------------------------------------------------------- /samples/nway_tree/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | project(nway_tree) 2 | aux_source_directory(. SRC_LIST) 3 | add_executable(${PROJECT_NAME} 4 | ${SRC_LIST} 5 | ) 6 | 7 | target_include_directories(${PROJECT_NAME} 8 | SYSTEM PUBLIC 9 | ${Boost_INCLUDE_DIRS} 10 | ) 11 | 12 | target_link_libraries(${PROJECT_NAME} PRIVATE 13 | litesimd 14 | ${Boost_LIBRARIES} 15 | ) 16 | 17 | set_property(TARGET ${PROJECT_NAME} PROPERTY CXX_STANDARD 11) 18 | -------------------------------------------------------------------------------- /samples/to_lower/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | project(to_lower) 2 | aux_source_directory(. SRC_LIST) 3 | add_executable(${PROJECT_NAME} 4 | ${SRC_LIST} 5 | ) 6 | 7 | target_include_directories(${PROJECT_NAME} 8 | SYSTEM PUBLIC 9 | ${Boost_INCLUDE_DIRS} 10 | ) 11 | 12 | target_link_libraries(${PROJECT_NAME} PRIVATE 13 | litesimd 14 | ${Boost_LIBRARIES} 15 | ) 16 | 17 | set_property(TARGET ${PROJECT_NAME} PROPERTY CXX_STANDARD 11) 18 | -------------------------------------------------------------------------------- /samples/bubble_sort/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | project(bubble_sort) 2 | aux_source_directory(. SRC_LIST) 3 | add_executable(${PROJECT_NAME} 4 | ${SRC_LIST} 5 | ) 6 | 7 | target_include_directories(${PROJECT_NAME} 8 | SYSTEM PUBLIC 9 | ${Boost_INCLUDE_DIRS} 10 | ) 11 | 12 | target_link_libraries(${PROJECT_NAME} PRIVATE 13 | litesimd 14 | ${Boost_LIBRARIES} 15 | ) 16 | 17 | set_property(TARGET ${PROJECT_NAME} PROPERTY CXX_STANDARD 11) 18 | -------------------------------------------------------------------------------- /samples/binary_search/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | project(binary_search) 2 | aux_source_directory(. SRC_LIST) 3 | add_executable(${PROJECT_NAME} 4 | ${SRC_LIST} 5 | ) 6 | 7 | target_include_directories(${PROJECT_NAME} 8 | SYSTEM PUBLIC 9 | ${Boost_INCLUDE_DIRS} 10 | ) 11 | 12 | target_link_libraries(${PROJECT_NAME} PRIVATE 13 | litesimd 14 | ${Boost_LIBRARIES} 15 | ) 16 | 17 | set_property(TARGET ${PROJECT_NAME} PROPERTY CXX_STANDARD 11) 18 | -------------------------------------------------------------------------------- /samples/boyer_moore_horspool/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | project(boyer_moore_horspool) 2 | aux_source_directory(. SRC_LIST) 3 | add_executable(${PROJECT_NAME} 4 | ${SRC_LIST} 5 | ) 6 | 7 | target_include_directories(${PROJECT_NAME} 8 | SYSTEM PUBLIC 9 | ${Boost_INCLUDE_DIRS} 10 | ) 11 | 12 | target_link_libraries(${PROJECT_NAME} PRIVATE 13 | litesimd 14 | ${Boost_LIBRARIES} 15 | ) 16 | 17 | set_property(TARGET ${PROJECT_NAME} PROPERTY CXX_STANDARD 11) 18 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Prerequisites 2 | *.d 3 | 4 | # Compiled Object files 5 | *.slo 6 | *.lo 7 | *.o 8 | *.obj 9 | 10 | # Precompiled Headers 11 | *.gch 12 | *.pch 13 | 14 | # Compiled Dynamic libraries 15 | *.so 16 | *.dylib 17 | *.dll 18 | 19 | # Fortran module files 20 | *.mod 21 | *.smod 22 | 23 | # Compiled Static libraries 24 | *.lai 25 | *.la 26 | *.a 27 | *.lib 28 | 29 | # Executables 30 | *.exe 31 | *.out 32 | *.app 33 | 34 | # vim 35 | *.swp 36 | 37 | # Directories 38 | build/ 39 | compile_commands.json 40 | ID 41 | -------------------------------------------------------------------------------- /CMakeLists.txt: -------------------------------------------------------------------------------- 1 | project(litesimd) 2 | cmake_minimum_required(VERSION 3.5) 3 | 4 | if(MSVC) 5 | add_compile_options( 6 | "/W4" 7 | "$<$:/O2>" 8 | "$<$:/Oi>" 9 | "/MD$<$:d>" 10 | ) 11 | else() 12 | add_compile_options( 13 | "-Wall" 14 | "-march=native" 15 | "-mtune=native" 16 | "-fno-strict-aliasing" 17 | ) 18 | endif() 19 | 20 | set(CMAKE_EXPORT_COMPILE_COMMANDS ON) 21 | 22 | enable_testing() 23 | 24 | add_library(${PROJECT_NAME} INTERFACE) 25 | target_compile_definitions(${PROJECT_NAME} INTERFACE LIBRARY_HEADER_ONLY) 26 | target_include_directories(${PROJECT_NAME} INTERFACE include) 27 | install(DIRECTORY include/ DESTINATION include) 28 | 29 | add_subdirectory(doc) 30 | add_subdirectory(samples) 31 | add_subdirectory(test) 32 | -------------------------------------------------------------------------------- /doc/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | # add a target to generate API documentation with Doxygen 2 | find_package(Doxygen) 3 | option(BUILD_DOCUMENTATION "Create and install the HTML based API documentation (requires Doxygen)" ${DOXYGEN_FOUND}) 4 | 5 | if(BUILD_DOCUMENTATION) 6 | if(NOT DOXYGEN_FOUND) 7 | message(FATAL_ERROR "Doxygen is needed to build the documentation.") 8 | endif() 9 | 10 | set(doxyfile_in ${CMAKE_CURRENT_SOURCE_DIR}/litesimd.doxy) 11 | set(doxyfile ${CMAKE_CURRENT_BINARY_DIR}/Doxyfile) 12 | 13 | configure_file(${doxyfile_in} ${doxyfile} @ONLY) 14 | 15 | add_custom_target(doc ALL 16 | COMMAND ${DOXYGEN_EXECUTABLE} ${doxyfile} 17 | WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR} 18 | COMMENT "Generating litesimd documentation with Doxygen" 19 | VERBATIM) 20 | 21 | install(DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}/litesimd DESTINATION share/doc) 22 | endif() 23 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2018 André Tupinambá 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /samples/nway_tree/do_nothing.cpp: -------------------------------------------------------------------------------- 1 | // MIT License 2 | // 3 | // Copyright (c) 2018 André Tupinambá 4 | // 5 | // Permission is hereby granted, free of charge, to any person obtaining a copy 6 | // of this software and associated documentation files (the "Software"), to deal 7 | // in the Software without restriction, including without limitation the rights 8 | // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | // copies of the Software, and to permit persons to whom the Software is 10 | // furnished to do so, subject to the following conditions: 11 | // 12 | // The above copyright notice and this permission notice shall be included in all 13 | // copies or substantial portions of the Software. 14 | // 15 | // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | // SOFTWARE. 22 | 23 | #include 24 | 25 | void do_nothing( int32_t ) 26 | { 27 | } 28 | -------------------------------------------------------------------------------- /samples/binary_search/do_nothing.cpp: -------------------------------------------------------------------------------- 1 | // MIT License 2 | // 3 | // Copyright (c) 2018 André Tupinambá 4 | // 5 | // Permission is hereby granted, free of charge, to any person obtaining a copy 6 | // of this software and associated documentation files (the "Software"), to deal 7 | // in the Software without restriction, including without limitation the rights 8 | // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | // copies of the Software, and to permit persons to whom the Software is 10 | // furnished to do so, subject to the following conditions: 11 | // 12 | // The above copyright notice and this permission notice shall be included in all 13 | // copies or substantial portions of the Software. 14 | // 15 | // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | // SOFTWARE. 22 | 23 | #include 24 | 25 | void do_nothing( int32_t ) 26 | { 27 | } 28 | -------------------------------------------------------------------------------- /samples/boyer_moore_horspool/do_nothing.cpp: -------------------------------------------------------------------------------- 1 | // MIT License 2 | // 3 | // Copyright (c) 2018 André Tupinambá 4 | // 5 | // Permission is hereby granted, free of charge, to any person obtaining a copy 6 | // of this software and associated documentation files (the "Software"), to deal 7 | // in the Software without restriction, including without limitation the rights 8 | // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | // copies of the Software, and to permit persons to whom the Software is 10 | // furnished to do so, subject to the following conditions: 11 | // 12 | // The above copyright notice and this permission notice shall be included in all 13 | // copies or substantial portions of the Software. 14 | // 15 | // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | // SOFTWARE. 22 | 23 | #include 24 | 25 | void do_nothing( size_t ) 26 | { 27 | } 28 | -------------------------------------------------------------------------------- /appveyor.yml: -------------------------------------------------------------------------------- 1 | version: 0.1.0.{build} 2 | skip_tags: true 3 | image: 4 | - Visual Studio 2017 5 | - Visual Studio 2015 6 | platform: x64 7 | configuration: Release 8 | clone_folder: c:\projects\litesimd 9 | 10 | environment: 11 | matrix: 12 | - arch: Win64 13 | matrix: 14 | fast_finish: true 15 | 16 | # skip unsupported combinations 17 | init: 18 | - set arch= 19 | - if "%arch%"=="Win64" ( set arch= Win64) 20 | - echo %arch% 21 | - echo %APPVEYOR_BUILD_WORKER_IMAGE% 22 | - if "%APPVEYOR_BUILD_WORKER_IMAGE%"=="Visual Studio 2017" ( set generator="Visual Studio 15 2017" ) 23 | - if "%APPVEYOR_BUILD_WORKER_IMAGE%"=="Visual Studio 2015" ( set generator="Visual Studio 14 2015" ) 24 | - echo %generator% 25 | 26 | before_build: 27 | - cmd: |- 28 | mkdir build 29 | cd build 30 | cmake --version 31 | cmake -G %generator% -DCMAKE_CXX_FLAGS="/EHsc /D BOOST_ALL_DYN_LINK /D WIN32_LEAN_AND_MEAN" -DBOOST_ROOT=C:\Libraries\boost_1_65_1 -DCMAKE_BUILD_TYPE=Release -DCMAKE_GENERATOR_PLATFORM=x64 .. 32 | build: 33 | project: c:\projects\litesimd\build\litesimd.sln 34 | verbosity: normal 35 | parallel: true 36 | only_commits: 37 | files: 38 | - CMakeLists.txt 39 | - appveyor.yml 40 | - include/ 41 | - samples/ 42 | - test/ 43 | -------------------------------------------------------------------------------- /samples/to_lower/do_nothing.cpp: -------------------------------------------------------------------------------- 1 | // MIT License 2 | // 3 | // Copyright (c) 2018 André Tupinambá 4 | // 5 | // Permission is hereby granted, free of charge, to any person obtaining a copy 6 | // of this software and associated documentation files (the "Software"), to deal 7 | // in the Software without restriction, including without limitation the rights 8 | // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | // copies of the Software, and to permit persons to whom the Software is 10 | // furnished to do so, subject to the following conditions: 11 | // 12 | // The above copyright notice and this permission notice shall be included in all 13 | // copies or substantial portions of the Software. 14 | // 15 | // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | // SOFTWARE. 22 | 23 | #include 24 | #include 25 | 26 | void do_nothing( const litesimd::string& ) {} 27 | -------------------------------------------------------------------------------- /test/main.cpp: -------------------------------------------------------------------------------- 1 | // MIT License 2 | // 3 | // Copyright (c) 2018 André Tupinambá 4 | // 5 | // Permission is hereby granted, free of charge, to any person obtaining a copy 6 | // of this software and associated documentation files (the "Software"), to deal 7 | // in the Software without restriction, including without limitation the rights 8 | // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | // copies of the Software, and to permit persons to whom the Software is 10 | // furnished to do so, subject to the following conditions: 11 | // 12 | // The above copyright notice and this permission notice shall be included in all 13 | // copies or substantial portions of the Software. 14 | // 15 | // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | // SOFTWARE. 22 | 23 | #include "gtest/gtest.h" 24 | 25 | int main(int argc, char **argv) 26 | { 27 | ::testing::InitGoogleTest(&argc, argv); 28 | int ret = RUN_ALL_TESTS(); 29 | return ret; 30 | } 31 | 32 | -------------------------------------------------------------------------------- /include/litesimd/detail/arch/intravector.h: -------------------------------------------------------------------------------- 1 | // MIT License 2 | // 3 | // Copyright (c) 2018 André Tupinambá 4 | // 5 | // Permission is hereby granted, free of charge, to any person obtaining a copy 6 | // of this software and associated documentation files (the "Software"), to deal 7 | // in the Software without restriction, including without limitation the rights 8 | // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | // copies of the Software, and to permit persons to whom the Software is 10 | // furnished to do so, subject to the following conditions: 11 | // 12 | // The above copyright notice and this permission notice shall be included in all 13 | // copies or substantial portions of the Software. 14 | // 15 | // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | // SOFTWARE. 22 | 23 | #ifndef LITESIMD_ARCH_INTRAVECTOR_H 24 | #define LITESIMD_ARCH_INTRAVECTOR_H 25 | 26 | #include 27 | #include 28 | 29 | #endif // LITESIMD_ARCH_INTRAVECTOR_H 30 | 31 | -------------------------------------------------------------------------------- /include/litesimd/detail/arch/bitwise.h: -------------------------------------------------------------------------------- 1 | // MIT License 2 | // 3 | // Copyright (c) 2018 André Tupinambá 4 | // 5 | // Permission is hereby granted, free of charge, to any person obtaining a copy 6 | // of this software and associated documentation files (the "Software"), to deal 7 | // in the Software without restriction, including without limitation the rights 8 | // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | // copies of the Software, and to permit persons to whom the Software is 10 | // furnished to do so, subject to the following conditions: 11 | // 12 | // The above copyright notice and this permission notice shall be included in all 13 | // copies or substantial portions of the Software. 14 | // 15 | // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | // SOFTWARE. 22 | 23 | #ifndef LITESIMD_ARCH_BITWISE_H 24 | #define LITESIMD_ARCH_BITWISE_H 25 | 26 | #include 27 | #include 28 | #include 29 | 30 | #endif // LITESIMD_ARCH_BITWISE_H 31 | -------------------------------------------------------------------------------- /include/litesimd/detail/arch/compare.h: -------------------------------------------------------------------------------- 1 | // MIT License 2 | // 3 | // Copyright (c) 2018 André Tupinambá 4 | // 5 | // Permission is hereby granted, free of charge, to any person obtaining a copy 6 | // of this software and associated documentation files (the "Software"), to deal 7 | // in the Software without restriction, including without limitation the rights 8 | // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | // copies of the Software, and to permit persons to whom the Software is 10 | // furnished to do so, subject to the following conditions: 11 | // 12 | // The above copyright notice and this permission notice shall be included in all 13 | // copies or substantial portions of the Software. 14 | // 15 | // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | // SOFTWARE. 22 | 23 | #ifndef LITESIMD_ARCH_COMPARE_H 24 | #define LITESIMD_ARCH_COMPARE_H 25 | 26 | #include 27 | #include 28 | #include 29 | 30 | #endif // LITESIMD_ARCH_COMPARE_H 31 | -------------------------------------------------------------------------------- /include/litesimd/detail/arch/shuffle.h: -------------------------------------------------------------------------------- 1 | // MIT License 2 | // 3 | // Copyright (c) 2018 André Tupinambá 4 | // 5 | // Permission is hereby granted, free of charge, to any person obtaining a copy 6 | // of this software and associated documentation files (the "Software"), to deal 7 | // in the Software without restriction, including without limitation the rights 8 | // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | // copies of the Software, and to permit persons to whom the Software is 10 | // furnished to do so, subject to the following conditions: 11 | // 12 | // The above copyright notice and this permission notice shall be included in all 13 | // copies or substantial portions of the Software. 14 | // 15 | // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | // SOFTWARE. 22 | 23 | #ifndef LITESIMD_ARCH_SHUFFLE_H 24 | #define LITESIMD_ARCH_SHUFFLE_H 25 | 26 | #include 27 | #include 28 | #include 29 | 30 | #endif // LITESIMD_ARCH_SHUFFLE_H 31 | -------------------------------------------------------------------------------- /include/litesimd/detail/arch/traits.h: -------------------------------------------------------------------------------- 1 | // MIT License 2 | // 3 | // Copyright (c) 2018 André Tupinambá 4 | // 5 | // Permission is hereby granted, free of charge, to any person obtaining a copy 6 | // of this software and associated documentation files (the "Software"), to deal 7 | // in the Software without restriction, including without limitation the rights 8 | // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | // copies of the Software, and to permit persons to whom the Software is 10 | // furnished to do so, subject to the following conditions: 11 | // 12 | // The above copyright notice and this permission notice shall be included in all 13 | // copies or substantial portions of the Software. 14 | // 15 | // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | // SOFTWARE. 22 | 23 | #ifndef LITESIMD_ARCH_TRAITS_H 24 | #define LITESIMD_ARCH_TRAITS_H 25 | 26 | #include 27 | #include 28 | #include 29 | 30 | #endif // LITESIMD_ARCH_TRAITS_H 31 | -------------------------------------------------------------------------------- /include/litesimd/detail/arch/arithmetic.h: -------------------------------------------------------------------------------- 1 | // MIT License 2 | // 3 | // Copyright (c) 2018 André Tupinambá 4 | // 5 | // Permission is hereby granted, free of charge, to any person obtaining a copy 6 | // of this software and associated documentation files (the "Software"), to deal 7 | // in the Software without restriction, including without limitation the rights 8 | // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | // copies of the Software, and to permit persons to whom the Software is 10 | // furnished to do so, subject to the following conditions: 11 | // 12 | // The above copyright notice and this permission notice shall be included in all 13 | // copies or substantial portions of the Software. 14 | // 15 | // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | // SOFTWARE. 22 | 23 | #ifndef LITESIMD_ARCH_ARITHMETIC_H 24 | #define LITESIMD_ARCH_ARITHMETIC_H 25 | 26 | #include 27 | #include 28 | #include 29 | 30 | #endif // LITESIMD_ARCH_ARITHMETIC_H 31 | 32 | 33 | -------------------------------------------------------------------------------- /include/litesimd/detail/arch/avx/tag.h: -------------------------------------------------------------------------------- 1 | // MIT License 2 | // 3 | // Copyright (c) 2018 André Tupinambá 4 | // 5 | // Permission is hereby granted, free of charge, to any person obtaining a copy 6 | // of this software and associated documentation files (the "Software"), to deal 7 | // in the Software without restriction, including without limitation the rights 8 | // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | // copies of the Software, and to permit persons to whom the Software is 10 | // furnished to do so, subject to the following conditions: 11 | // 12 | // The above copyright notice and this permission notice shall be included in all 13 | // copies or substantial portions of the Software. 14 | // 15 | // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | // SOFTWARE. 22 | 23 | #ifndef LITESIMD_AVX_TAG_H 24 | #define LITESIMD_AVX_TAG_H 25 | 26 | #include 27 | 28 | #ifdef LITESIMD_HAS_AVX 29 | 30 | namespace litesimd { 31 | 32 | struct avx_tag {}; 33 | 34 | } // namespace litesimd 35 | 36 | #endif //LITESIMD_HAS_AVX 37 | 38 | #endif //LITESIMD_AVX_TAG_H 39 | -------------------------------------------------------------------------------- /include/litesimd/detail/arch/sse/tag.h: -------------------------------------------------------------------------------- 1 | // MIT License 2 | // 3 | // Copyright (c) 2018 André Tupinambá 4 | // 5 | // Permission is hereby granted, free of charge, to any person obtaining a copy 6 | // of this software and associated documentation files (the "Software"), to deal 7 | // in the Software without restriction, including without limitation the rights 8 | // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | // copies of the Software, and to permit persons to whom the Software is 10 | // furnished to do so, subject to the following conditions: 11 | // 12 | // The above copyright notice and this permission notice shall be included in all 13 | // copies or substantial portions of the Software. 14 | // 15 | // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | // SOFTWARE. 22 | 23 | #ifndef LITESIMD_SSE_TAG_H 24 | #define LITESIMD_SSE_TAG_H 25 | 26 | #include 27 | 28 | #ifdef LITESIMD_HAS_SSE 29 | 30 | namespace litesimd { 31 | 32 | struct sse_tag {}; 33 | 34 | } // namespace litesimd 35 | 36 | 37 | #endif //LITESIMD_HAS_SSE 38 | 39 | #endif //LITESIMD_SSE_TAG_H 40 | -------------------------------------------------------------------------------- /include/litesimd/detail/arch/common/traits.h: -------------------------------------------------------------------------------- 1 | // MIT License 2 | // 3 | // Copyright (c) 2018 André Tupinambá 4 | // 5 | // Permission is hereby granted, free of charge, to any person obtaining a copy 6 | // of this software and associated documentation files (the "Software"), to deal 7 | // in the Software without restriction, including without limitation the rights 8 | // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | // copies of the Software, and to permit persons to whom the Software is 10 | // furnished to do so, subject to the following conditions: 11 | // 12 | // The above copyright notice and this permission notice shall be included in all 13 | // copies or substantial portions of the Software. 14 | // 15 | // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | // SOFTWARE. 22 | 23 | #ifndef LITESIMD_COMMON_TRAITS_H 24 | #define LITESIMD_COMMON_TRAITS_H 25 | 26 | #include 27 | 28 | namespace litesimd { 29 | 30 | template< typename ValueType_T, typename Tag_T = default_tag > 31 | struct traits{}; 32 | 33 | } // namespace litesimd 34 | 35 | #endif //LITESIMD_COMMON_TRAITS_H 36 | -------------------------------------------------------------------------------- /include/litesimd/detail/arch/tag.h: -------------------------------------------------------------------------------- 1 | // MIT License 2 | // 3 | // Copyright (c) 2018 André Tupinambá 4 | // 5 | // Permission is hereby granted, free of charge, to any person obtaining a copy 6 | // of this software and associated documentation files (the "Software"), to deal 7 | // in the Software without restriction, including without limitation the rights 8 | // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | // copies of the Software, and to permit persons to whom the Software is 10 | // furnished to do so, subject to the following conditions: 11 | // 12 | // The above copyright notice and this permission notice shall be included in all 13 | // copies or substantial portions of the Software. 14 | // 15 | // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | // SOFTWARE. 22 | 23 | #ifndef LITESIMD_ARCH_TAG_H 24 | #define LITESIMD_ARCH_TAG_H 25 | 26 | #include 27 | #include 28 | 29 | namespace litesimd { 30 | 31 | #if defined(LITESIMD_HAS_AVX) 32 | 33 | using default_tag = avx_tag; 34 | 35 | #elif defined(LITESIMD_HAS_SSE) 36 | 37 | using default_tag = sse_tag; 38 | 39 | #endif //LITESIMD_HAS_SSE 40 | 41 | 42 | } // namespace litesimd 43 | 44 | #endif // LITESIMD_ARCH_TAG_H 45 | -------------------------------------------------------------------------------- /include/litesimd/detail/arch/avx/detail/compatibility.h: -------------------------------------------------------------------------------- 1 | // MIT License 2 | // 3 | // Copyright (c) 2018 André Tupinambá 4 | // 5 | // Permission is hereby granted, free of charge, to any person obtaining a copy 6 | // of this software and associated documentation files (the "Software"), to deal 7 | // in the Software without restriction, including without limitation the rights 8 | // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | // copies of the Software, and to permit persons to whom the Software is 10 | // furnished to do so, subject to the following conditions: 11 | // 12 | // The above copyright notice and this permission notice shall be included in all 13 | // copies or substantial portions of the Software. 14 | // 15 | // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | // SOFTWARE. 22 | 23 | #ifndef LITESIMD_AVX_DETAIL_COMPATIBILITY_H 24 | #define LITESIMD_AVX_DETAIL_COMPATIBILITY_H 25 | 26 | #ifdef LITESIMD_HAS_AVX 27 | 28 | #if defined(__GNUC__) && (__GNUC__ < 7) && !defined(__clang__) 29 | // old GCC workarounds 30 | static inline float _mm256_cvtss_f32( __m256 vec ) { return _mm_cvtss_f32( _mm256_extractf128_ps( vec, 0 ) ); } 31 | static inline double _mm256_cvtsd_f64( __m256d vec ) { return _mm_cvtsd_f64( _mm256_extractf128_pd( vec, 0 ) ); } 32 | #endif 33 | 34 | #endif // LITESIMD_HAS_AVX 35 | #endif // LITESIMD_AVX_DETAIL_COMPATIBILITY_H 36 | -------------------------------------------------------------------------------- /include/litesimd/detail/arch/common/intravector.h: -------------------------------------------------------------------------------- 1 | // MIT License 2 | // 3 | // Copyright (c) 2018 André Tupinambá 4 | // 5 | // Permission is hereby granted, free of charge, to any person obtaining a copy 6 | // of this software and associated documentation files (the "Software"), to deal 7 | // in the Software without restriction, including without limitation the rights 8 | // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | // copies of the Software, and to permit persons to whom the Software is 10 | // furnished to do so, subject to the following conditions: 11 | // 12 | // The above copyright notice and this permission notice shall be included in all 13 | // copies or substantial portions of the Software. 14 | // 15 | // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | // SOFTWARE. 22 | 23 | #ifndef LITESIMD_ARCH_COMMON_INTRAVECTOR_H 24 | #define LITESIMD_ARCH_COMMON_INTRAVECTOR_H 25 | 26 | #include 27 | 28 | namespace litesimd { 29 | 30 | // Generic intravector operation 31 | // --------------------------------------------------------------------------------------- 32 | template< typename ValueType_T, typename Tag_T > 33 | struct intravector_op 34 | { 35 | template< typename Function_T > 36 | inline simd_type< ValueType_T, Tag_T > 37 | operator()( simd_type< ValueType_T, Tag_T >, Function_T ){} 38 | }; 39 | 40 | } // namespace litesimd 41 | 42 | #endif // LITESIMD_ARCH_COMMON_INTRAVECTOR_H 43 | -------------------------------------------------------------------------------- /include/litesimd/algorithm.h: -------------------------------------------------------------------------------- 1 | // MIT License 2 | // 3 | // Copyright (c) 2018 André Tupinambá 4 | // 5 | // Permission is hereby granted, free of charge, to any person obtaining a copy 6 | // of this software and associated documentation files (the "Software"), to deal 7 | // in the Software without restriction, including without limitation the rights 8 | // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | // copies of the Software, and to permit persons to whom the Software is 10 | // furnished to do so, subject to the following conditions: 11 | // 12 | // The above copyright notice and this permission notice shall be included in all 13 | // copies or substantial portions of the Software. 14 | // 15 | // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | // SOFTWARE. 22 | 23 | #ifndef LITESIMD_ALGORITHM_H 24 | #define LITESIMD_ALGORITHM_H 25 | 26 | #include 27 | #include 28 | #include 29 | #include 30 | #include 31 | #include 32 | 33 | /** 34 | * \defgroup algorithm Algorithm functions 35 | * 36 | * Algorithm defines a collection of functions especially designed to be used 37 | * on each value inside the packed SIMD register. 38 | * 39 | * All this functions are accessable at `` 40 | */ 41 | 42 | #endif // LITESIMD_ALGORITHM_H 43 | -------------------------------------------------------------------------------- /include/litesimd/detail/arch/common/arch.h: -------------------------------------------------------------------------------- 1 | // MIT License 2 | // 3 | // Copyright (c) 2018 André Tupinambá 4 | // 5 | // Permission is hereby granted, free of charge, to any person obtaining a copy 6 | // of this software and associated documentation files (the "Software"), to deal 7 | // in the Software without restriction, including without limitation the rights 8 | // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | // copies of the Software, and to permit persons to whom the Software is 10 | // furnished to do so, subject to the following conditions: 11 | // 12 | // The above copyright notice and this permission notice shall be included in all 13 | // copies or substantial portions of the Software. 14 | // 15 | // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | // SOFTWARE. 22 | 23 | #ifndef LITESIMD_ARCH_COMMON_ARCH_H 24 | #define LITESIMD_ARCH_COMMON_ARCH_H 25 | 26 | // Check windows 27 | #if defined(_WIN32) 28 | #define LITESIMD_HAS_SSE 29 | 30 | #if defined(__AVX2__) 31 | #define LITESIMD_HAS_AVX 32 | #endif //__AVX2__ 33 | 34 | #endif //_WIN64 35 | 36 | // Check GCC 37 | #if defined(__GNUC__) 38 | #if defined(__x86_64__) 39 | #define LITESIMD_HAS_SSE 40 | 41 | #if defined(__AVX2__) 42 | #define LITESIMD_HAS_AVX 43 | #endif //__AVX2__ 44 | 45 | #endif //__x86_64__ 46 | 47 | #if defined(__ARM_ARCH_7__) 48 | #define LITESIMD_HAS_NEON 49 | #endif //__ARM_ARCH_7__ 50 | 51 | #endif //__GNUC__ 52 | 53 | 54 | #endif // LITESIMD_ARCH_COMMON_ARCH_H 55 | -------------------------------------------------------------------------------- /include/litesimd/helpers/containers.h: -------------------------------------------------------------------------------- 1 | // MIT License 2 | // 3 | // Copyright (c) 2018 André Tupinambá 4 | // 5 | // Permission is hereby granted, free of charge, to any person obtaining a copy 6 | // of this software and associated documentation files (the "Software"), to deal 7 | // in the Software without restriction, including without limitation the rights 8 | // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | // copies of the Software, and to permit persons to whom the Software is 10 | // furnished to do so, subject to the following conditions: 11 | // 12 | // The above copyright notice and this permission notice shall be included in all 13 | // copies or substantial portions of the Software. 14 | // 15 | // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | // SOFTWARE. 22 | 23 | #ifndef LITESIMD_HELPERS_CONTAINERS_H 24 | #define LITESIMD_HELPERS_CONTAINERS_H 25 | 26 | #include 27 | #include 28 | #include 29 | 30 | namespace litesimd { 31 | 32 | /** 33 | * \defgroup helpers Helper classes and compatibility 34 | * 35 | * Helpers defines a group of type substitution and `std` compatibility functions 36 | * and classes. 37 | * 38 | * Each file must be included separately from the path `` 39 | */ 40 | 41 | /** 42 | * \ingroup helpers 43 | * \brief Aligned `std::vector` 44 | * 45 | * This type is a drop-in substitute to `std::vector` with aligned allocator. 46 | * 47 | * Depends on `boost::align`. 48 | */ 49 | template< typename Val_T > 50 | using vector = std::vector< Val_T, boost::alignment::aligned_allocator >; 51 | 52 | /** 53 | * \ingroup helpers 54 | * \brief Aligned `std::string` 55 | * 56 | * This type is a drop-in substitute to `std::string` with aligned allocator. 57 | * 58 | * Depends on `boost::align`. 59 | */ 60 | using string = std::basic_string< char, 61 | std::char_traits, 62 | boost::alignment::aligned_allocator >; 63 | 64 | } // namespace litesimd 65 | 66 | #endif // LITESIMD_HELPERS_CONTAINERS_H 67 | -------------------------------------------------------------------------------- /include/litesimd/algorithm/detail/for_each_loop.h: -------------------------------------------------------------------------------- 1 | // MIT License 2 | // 3 | // Copyright (c) 2018 André Tupinambá 4 | // 5 | // Permission is hereby granted, free of charge, to any person obtaining a copy 6 | // of this software and associated documentation files (the "Software"), to deal 7 | // in the Software without restriction, including without limitation the rights 8 | // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | // copies of the Software, and to permit persons to whom the Software is 10 | // furnished to do so, subject to the following conditions: 11 | // 12 | // The above copyright notice and this permission notice shall be included in all 13 | // copies or substantial portions of the Software. 14 | // 15 | // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | // SOFTWARE. 22 | 23 | #ifndef LITESIMD_ALGORITHM_DETAIL_FOREACHLOOP_H 24 | #define LITESIMD_ALGORITHM_DETAIL_FOREACHLOOP_H 25 | 26 | namespace litesimd { 27 | namespace detail { 28 | 29 | template< int index, typename SimdType_T, typename Function_T > 30 | struct for_each_loop 31 | { 32 | bool operator()( bool forward, SimdType_T vec, Function_T func ) 33 | { 34 | using type = typename SimdType_T::simd_value_type; 35 | using tag = typename SimdType_T::simd_tag; 36 | if( forward ) 37 | { 38 | if( for_each_loop< index-1, SimdType_T, Function_T >()( true, vec, func ) ) 39 | { 40 | return func( index, get< index, type, tag >( vec ) ); 41 | } 42 | } 43 | else 44 | { 45 | if( func( index, get< index, type, tag >( vec ) ) ) 46 | { 47 | return for_each_loop< index-1, SimdType_T, Function_T >()( false, vec, func ); 48 | } 49 | } 50 | return false; 51 | } 52 | }; 53 | 54 | template< typename SimdType_T, typename Function_T > 55 | struct for_each_loop< 0, SimdType_T, Function_T > 56 | { 57 | bool operator()( bool, SimdType_T vec, Function_T func ) 58 | { 59 | using type = typename SimdType_T::simd_value_type; 60 | using tag = typename SimdType_T::simd_tag; 61 | 62 | return func( 0, get< 0, type, tag >( vec ) ); 63 | } 64 | }; 65 | 66 | }} // namespace litesimd::detail 67 | 68 | #endif // LITESIMD_ALGORITHM_DETAIL_FOREACHLOOP_H 69 | -------------------------------------------------------------------------------- /test/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | project(litesimd_test) 2 | 3 | find_package(Threads REQUIRED) 4 | 5 | # Enable ExternalProject CMake module 6 | include(ExternalProject) 7 | 8 | list(APPEND GTEST_CMAKE_ARGS "-DCMAKE_C_COMPILER=${CMAKE_C_COMPILER}") 9 | list(APPEND GTEST_CMAKE_ARGS "-DCMAKE_CXX_COMPILER=${CMAKE_CXX_COMPILER}") 10 | if(MSVC) 11 | list(APPEND GTEST_CMAKE_ARGS "-DBUILD_SHARED_LIBS=ON") 12 | endif() 13 | 14 | # Download and install GoogleTest 15 | ExternalProject_Add( 16 | gtest 17 | URL https://github.com/google/googletest/archive/release-1.8.1.zip 18 | PREFIX ${CMAKE_CURRENT_BINARY_DIR}/gtest 19 | # Disable install step 20 | INSTALL_COMMAND "" 21 | CMAKE_ARGS ${GTEST_CMAKE_ARGS} 22 | ) 23 | 24 | # Get GTest source and binary directories from CMake project 25 | ExternalProject_Get_Property(gtest source_dir binary_dir) 26 | 27 | # Create a libgtest target to be used as a dependency by test programs 28 | add_library(libgtest IMPORTED STATIC GLOBAL) 29 | add_dependencies(libgtest gtest) 30 | 31 | # Set libgtest properties 32 | if(MSVC) 33 | set_target_properties(libgtest PROPERTIES 34 | "IMPORTED_LOCATION" "${binary_dir}/googlemock/gtest/Release/gtest.lib" 35 | "IMPORTED_LINK_INTERFACE_LIBRARIES" "${CMAKE_THREAD_LIBS_INIT}" 36 | ) 37 | else() 38 | set_target_properties(libgtest PROPERTIES 39 | "IMPORTED_LOCATION" "${binary_dir}/googlemock/gtest/libgtest.a" 40 | "IMPORTED_LINK_INTERFACE_LIBRARIES" "${CMAKE_THREAD_LIBS_INIT}" 41 | ) 42 | endif() 43 | 44 | # Create a libgmock target to be used as a dependency by test programs 45 | #add_library(libgmock IMPORTED STATIC GLOBAL) 46 | #add_dependencies(libgmock gtest) 47 | # 48 | ## Set libgmock properties 49 | #set_target_properties(libgmock PROPERTIES 50 | # "IMPORTED_LOCATION" "${binary_dir}/googlemock/libgmock.a" 51 | # "IMPORTED_LINK_INTERFACE_LIBRARIES" "${CMAKE_THREAD_LIBS_INIT}" 52 | #) 53 | 54 | cmake_policy(SET CMP0054 NEW) 55 | if(CMAKE_VERSION VERSION_LESS "3.10") 56 | cmake_policy(SET CMP0057 NEW) 57 | endif() 58 | 59 | if(CMAKE_VERSION VERSION_LESS "3.9") 60 | find_package(GTest) 61 | else() 62 | include(GoogleTest) 63 | endif() 64 | 65 | # Test project 66 | aux_source_directory(. SRC_LIST) 67 | add_executable(${PROJECT_NAME} 68 | ${SRC_LIST} 69 | ) 70 | 71 | target_include_directories(${PROJECT_NAME} 72 | SYSTEM PUBLIC 73 | ${Boost_INCLUDE_DIRS} 74 | "${source_dir}/googletest/include" 75 | ) 76 | 77 | target_link_libraries(${PROJECT_NAME} PRIVATE 78 | litesimd 79 | ${Boost_LIBRARIES} 80 | libgtest 81 | ) 82 | 83 | set_property(TARGET ${PROJECT_NAME} PROPERTY CXX_STANDARD 11) 84 | 85 | if(CMAKE_VERSION VERSION_LESS "3.9") 86 | GTEST_ADD_TESTS(${PROJECT_NAME} "" AUTO) 87 | elseif(CMAKE_VERSION VERSION_LESS "3.10") 88 | gtest_add_tests(TARGET ${PROJECT_NAME}) 89 | else() 90 | gtest_discover_tests(${PROJECT_NAME}) 91 | endif() 92 | -------------------------------------------------------------------------------- /include/litesimd/algorithm/iota.h: -------------------------------------------------------------------------------- 1 | // MIT License 2 | // 3 | // Copyright (c) 2018 André Tupinambá 4 | // 5 | // Permission is hereby granted, free of charge, to any person obtaining a copy 6 | // of this software and associated documentation files (the "Software"), to deal 7 | // in the Software without restriction, including without limitation the rights 8 | // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | // copies of the Software, and to permit persons to whom the Software is 10 | // furnished to do so, subject to the following conditions: 11 | // 12 | // The above copyright notice and this permission notice shall be included in all 13 | // copies or substantial portions of the Software. 14 | // 15 | // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | // SOFTWARE. 22 | 23 | #ifndef LITESIMD_ALGORITHM_IOTA_H 24 | #define LITESIMD_ALGORITHM_IOTA_H 25 | 26 | #include 27 | 28 | namespace litesimd { 29 | 30 | /** 31 | * \ingroup algorithm 32 | * \brief Store increasing sequence on a SIMD register 33 | * 34 | * Assigns to every element in SIMD register successive values of _val_, as if incremented with `++val` after each element is written. 35 | * 36 | * \param val Start value (optional, default 0) 37 | * \returns SIMD register with increasing values 38 | * 39 | * **Example** 40 | * ```{.cpp} 41 | * #include 42 | * #include 43 | * #include 44 | * #include 45 | * 46 | * int main() 47 | * { 48 | * namespace ls = litesimd; 49 | * 50 | * std::cout << "iota< int32_t >(): " << ls::iota< int32_t >() << std::endl; 51 | * std::cout << "iota< int16_t >( 4 ): " << ls::iota< int16_t >( 4 ) << std::endl; 52 | * std::cout << "iota< double >( 1.2 ): " << ls::iota< double >( 1.2 ) << std::endl; 53 | * return 0; 54 | * } 55 | * ``` 56 | * Output on a SSE compilation 57 | * ``` 58 | * iota< int32_t >(): (3, 2, 1, 0) 59 | * iota< int16_t >( 4 ): (11, 10, 9, 8, 7, 6, 5, 4) 60 | * iota< double >( 1.2 ): (2.2, 1.2) 61 | * ``` 62 | */ 63 | template< typename ValueType_T, typename Tag_T = default_tag > 64 | inline simd_type< ValueType_T, Tag_T > 65 | iota( ValueType_T val ) 66 | { 67 | return add( val, simd_type< ValueType_T, Tag_T >::iota() ); 68 | } 69 | 70 | template< typename ValueType_T, typename Tag_T = default_tag > 71 | inline simd_type< ValueType_T, Tag_T > 72 | iota() 73 | { 74 | return simd_type< ValueType_T, Tag_T >::iota(); 75 | } 76 | 77 | } // namespace litesimd 78 | 79 | #endif // LITESIMD_ALGORITHM_IOTA_H 80 | -------------------------------------------------------------------------------- /include/litesimd/helpers/iostream.h: -------------------------------------------------------------------------------- 1 | // MIT License 2 | // 3 | // Copyright (c) 2018 André Tupinambá 4 | // 5 | // Permission is hereby granted, free of charge, to any person obtaining a copy 6 | // of this software and associated documentation files (the "Software"), to deal 7 | // in the Software without restriction, including without limitation the rights 8 | // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | // copies of the Software, and to permit persons to whom the Software is 10 | // furnished to do so, subject to the following conditions: 11 | // 12 | // The above copyright notice and this permission notice shall be included in all 13 | // copies or substantial portions of the Software. 14 | // 15 | // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | // SOFTWARE. 22 | 23 | #ifndef LITESIMD_HELPERS_IOSTREAM_H 24 | #define LITESIMD_HELPERS_IOSTREAM_H 25 | 26 | #include 27 | #include 28 | #include 29 | #include 30 | #include 31 | #include 32 | 33 | namespace litesimd { 34 | 35 | /** 36 | * \ingroup helpers 37 | * \brief Compatibility version to `operator<<` to output a `simd_type` on `ostream`. 38 | * 39 | * \param out Output stream 40 | * \param vec simd_type to print to stream 41 | * \return The output stream 42 | */ 43 | template< typename SimdType_T, 44 | typename std::enable_if::value>::type* = nullptr > 45 | inline std::ostream& operator<<( std::ostream& out, SimdType_T vec ) 46 | { 47 | std::ios_base::fmtflags f( out.flags() ); 48 | 49 | out << "("; 50 | for_each_backward( vec, [&out]( int index, typename SimdType_T::simd_value_type val ) -> bool 51 | { 52 | constexpr size_t mask = (1 << (2*sizeof(typename SimdType_T::simd_value_type)))-1; 53 | out << (+val & mask); 54 | if( index > 0 ) 55 | out << ", "; 56 | return true; 57 | } ); 58 | out << ")"; 59 | 60 | out.flags( f ); 61 | 62 | return out; 63 | } 64 | 65 | template< typename SimdType_T, 66 | typename std::enable_if::value>::type* = nullptr > 67 | inline std::ostream& operator<<( std::ostream& out, SimdType_T vec ) 68 | { 69 | std::ios_base::fmtflags f( out.flags() ); 70 | 71 | out << "("; 72 | for_each_backward( vec, [&out]( int index, typename SimdType_T::simd_value_type val ) -> bool 73 | { 74 | out << val; 75 | if( index > 0 ) 76 | out << ", "; 77 | return true; 78 | } ); 79 | out << ")"; 80 | 81 | out.flags( f ); 82 | 83 | return out; 84 | } 85 | 86 | } // namespace litesimd 87 | 88 | #endif // LITESIMD_HELPERS_IOSTREAM_H 89 | -------------------------------------------------------------------------------- /include/litesimd/detail/arch/sse/algorithm.h: -------------------------------------------------------------------------------- 1 | // MIT License 2 | // 3 | // Copyright (c) 2018 André Tupinambá 4 | // 5 | // Permission is hereby granted, free of charge, to any person obtaining a copy 6 | // of this software and associated documentation files (the "Software"), to deal 7 | // in the Software without restriction, including without limitation the rights 8 | // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | // copies of the Software, and to permit persons to whom the Software is 10 | // furnished to do so, subject to the following conditions: 11 | // 12 | // The above copyright notice and this permission notice shall be included in all 13 | // copies or substantial portions of the Software. 14 | // 15 | // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | // SOFTWARE. 22 | 23 | #ifndef LITESIMD_ARCH_SSE_ALGORITHM_H 24 | #define LITESIMD_ARCH_SSE_ALGORITHM_H 25 | 26 | #ifdef LITESIMD_HAS_SSE 27 | 28 | #include 29 | #include 30 | #include 31 | 32 | namespace litesimd { 33 | 34 | // Min max 35 | // --------------------------------------------------------------------------------------- 36 | #define DEF_BINARY_MIN( TYPE_T, CMD ) \ 37 | template<> inline simd_type< TYPE_T, sse_tag > \ 38 | min< TYPE_T, sse_tag >( simd_type< TYPE_T, sse_tag > lhs, \ 39 | simd_type< TYPE_T, sse_tag > rhs ) { \ 40 | return CMD( lhs, rhs ); } 41 | 42 | DEF_BINARY_MIN( int8_t, _mm_min_epi8 ) 43 | DEF_BINARY_MIN( int16_t, _mm_min_epi16 ) 44 | DEF_BINARY_MIN( int32_t, _mm_min_epi32 ) 45 | DEF_BINARY_MIN( float, _mm_min_ps ) 46 | DEF_BINARY_MIN( double, _mm_min_pd ) 47 | #undef DEF_BINARY_MIN 48 | 49 | #define DEF_BINARY_MAX( TYPE_T, CMD ) \ 50 | template<> inline simd_type< TYPE_T, sse_tag > \ 51 | max< TYPE_T, sse_tag >( simd_type< TYPE_T, sse_tag > lhs, \ 52 | simd_type< TYPE_T, sse_tag > rhs ) { \ 53 | return CMD( lhs, rhs ); } 54 | 55 | DEF_BINARY_MAX( int8_t, _mm_max_epi8 ) 56 | DEF_BINARY_MAX( int16_t, _mm_max_epi16 ) 57 | DEF_BINARY_MAX( int32_t, _mm_max_epi32 ) 58 | DEF_BINARY_MAX( float, _mm_max_ps ) 59 | DEF_BINARY_MAX( double, _mm_max_pd ) 60 | #undef DEF_BINARY_MAX 61 | 62 | template<> inline simd_type< int64_t, sse_tag > 63 | min< int64_t, sse_tag >( simd_type< int64_t, sse_tag > lhs, simd_type< int64_t, sse_tag > rhs ) 64 | { 65 | auto mask = greater< int64_t, sse_tag >( lhs, rhs ); 66 | return blend< int64_t, sse_tag >( mask, rhs, lhs ); 67 | } 68 | 69 | template<> inline simd_type< int64_t, sse_tag > 70 | max< int64_t, sse_tag >( simd_type< int64_t, sse_tag > lhs, simd_type< int64_t, sse_tag > rhs ) 71 | { 72 | auto mask = greater< int64_t, sse_tag >( lhs, rhs ); 73 | return blend< int64_t, sse_tag >( mask, lhs, rhs ); 74 | } 75 | 76 | } // namespace litesimd 77 | 78 | #endif // LITESIMD_HAS_SSE 79 | #endif // LITESIMD_ARCH_SSE_ALGORITHM_H 80 | -------------------------------------------------------------------------------- /include/litesimd/detail/arch/avx/algorithm.h: -------------------------------------------------------------------------------- 1 | // MIT License 2 | // 3 | // Copyright (c) 2018 André Tupinambá 4 | // 5 | // Permission is hereby granted, free of charge, to any person obtaining a copy 6 | // of this software and associated documentation files (the "Software"), to deal 7 | // in the Software without restriction, including without limitation the rights 8 | // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | // copies of the Software, and to permit persons to whom the Software is 10 | // furnished to do so, subject to the following conditions: 11 | // 12 | // The above copyright notice and this permission notice shall be included in all 13 | // copies or substantial portions of the Software. 14 | // 15 | // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | // SOFTWARE. 22 | 23 | #ifndef LITESIMD_ARCH_AVX_ALGORITHM_H 24 | #define LITESIMD_ARCH_AVX_ALGORITHM_H 25 | 26 | #ifdef LITESIMD_HAS_AVX 27 | 28 | #include 29 | #include 30 | #include 31 | 32 | namespace litesimd { 33 | 34 | // Min max 35 | // --------------------------------------------------------------------------------------- 36 | #define DEF_BINARY_MIN( TYPE_T, CMD ) \ 37 | template<> inline simd_type< TYPE_T, avx_tag > \ 38 | min< TYPE_T, avx_tag >( simd_type< TYPE_T, avx_tag > lhs, \ 39 | simd_type< TYPE_T, avx_tag > rhs ) { \ 40 | return CMD( lhs, rhs ); } 41 | 42 | DEF_BINARY_MIN( int8_t, _mm256_min_epi8 ) 43 | DEF_BINARY_MIN( int16_t, _mm256_min_epi16 ) 44 | DEF_BINARY_MIN( int32_t, _mm256_min_epi32 ) 45 | DEF_BINARY_MIN( float, _mm256_min_ps ) 46 | DEF_BINARY_MIN( double, _mm256_min_pd ) 47 | #undef DEF_BINARY_MIN 48 | 49 | #define DEF_BINARY_MAX( TYPE_T, CMD ) \ 50 | template<> inline simd_type< TYPE_T, avx_tag > \ 51 | max< TYPE_T, avx_tag >( simd_type< TYPE_T, avx_tag > lhs, \ 52 | simd_type< TYPE_T, avx_tag > rhs ) { \ 53 | return CMD( lhs, rhs ); } 54 | 55 | DEF_BINARY_MAX( int8_t, _mm256_max_epi8 ) 56 | DEF_BINARY_MAX( int16_t, _mm256_max_epi16 ) 57 | DEF_BINARY_MAX( int32_t, _mm256_max_epi32 ) 58 | DEF_BINARY_MAX( float, _mm256_max_ps ) 59 | DEF_BINARY_MAX( double, _mm256_max_pd ) 60 | #undef DEF_BINARY_MAX 61 | 62 | template<> inline simd_type< int64_t, avx_tag > 63 | min< int64_t, avx_tag >( simd_type< int64_t, avx_tag > lhs, simd_type< int64_t, avx_tag > rhs ) 64 | { 65 | auto mask = greater< int64_t, avx_tag >( lhs, rhs ); 66 | return blend< int64_t, avx_tag >( mask, rhs, lhs ); 67 | } 68 | 69 | template<> inline simd_type< int64_t, avx_tag > 70 | max< int64_t, avx_tag >( simd_type< int64_t, avx_tag > lhs, simd_type< int64_t, avx_tag > rhs ) 71 | { 72 | auto mask = greater< int64_t, avx_tag >( lhs, rhs ); 73 | return blend< int64_t, avx_tag >( mask, lhs, rhs ); 74 | } 75 | 76 | } // namespace litesimd 77 | 78 | #endif // LITESIMD_HAS_AVX 79 | #endif // LITESIMD_ARCH_AVX_ALGORITHM_H 80 | -------------------------------------------------------------------------------- /include/litesimd/detail/arch/sse/bitwise.h: -------------------------------------------------------------------------------- 1 | // MIT License 2 | // 3 | // Copyright (c) 2018 André Tupinambá 4 | // 5 | // Permission is hereby granted, free of charge, to any person obtaining a copy 6 | // of this software and associated documentation files (the "Software"), to deal 7 | // in the Software without restriction, including without limitation the rights 8 | // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | // copies of the Software, and to permit persons to whom the Software is 10 | // furnished to do so, subject to the following conditions: 11 | // 12 | // The above copyright notice and this permission notice shall be included in all 13 | // copies or substantial portions of the Software. 14 | // 15 | // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | // SOFTWARE. 22 | 23 | #ifndef LITESIMD_SSE_BITWISE_H 24 | #define LITESIMD_SSE_BITWISE_H 25 | 26 | #ifdef LITESIMD_HAS_SSE 27 | 28 | #include 29 | #include 30 | 31 | namespace litesimd { 32 | 33 | // Bit AND 34 | // --------------------------------------------------------------------------------------- 35 | #define DEF_BIT_AND( TYPE_T, CMD ) \ 36 | template<> inline simd_type< TYPE_T, sse_tag > \ 37 | bit_and< TYPE_T, sse_tag >( simd_type< TYPE_T, sse_tag > lhs, \ 38 | simd_type< TYPE_T, sse_tag > rhs ) { \ 39 | return CMD( lhs, rhs ); } 40 | 41 | DEF_BIT_AND( int8_t, _mm_and_si128 ) 42 | DEF_BIT_AND( int16_t, _mm_and_si128 ) 43 | DEF_BIT_AND( int32_t, _mm_and_si128 ) 44 | DEF_BIT_AND( int64_t, _mm_and_si128 ) 45 | DEF_BIT_AND( float, _mm_and_ps ) 46 | DEF_BIT_AND( double, _mm_and_pd ) 47 | #undef DEF_BIT_AND 48 | 49 | // Bit OR 50 | // --------------------------------------------------------------------------------------- 51 | #define DEF_BIT_OR( TYPE_T, CMD ) \ 52 | template<> inline simd_type< TYPE_T, sse_tag > \ 53 | bit_or< TYPE_T, sse_tag >( simd_type< TYPE_T, sse_tag > lhs, \ 54 | simd_type< TYPE_T, sse_tag > rhs ) { \ 55 | return CMD( lhs, rhs ); } 56 | 57 | DEF_BIT_OR( int8_t, _mm_or_si128 ) 58 | DEF_BIT_OR( int16_t, _mm_or_si128 ) 59 | DEF_BIT_OR( int32_t, _mm_or_si128 ) 60 | DEF_BIT_OR( int64_t, _mm_or_si128 ) 61 | DEF_BIT_OR( float, _mm_or_ps ) 62 | DEF_BIT_OR( double, _mm_or_pd ) 63 | #undef DEF_BIT_OR 64 | 65 | // Bit XOR 66 | // --------------------------------------------------------------------------------------- 67 | #define DEF_BIT_XOR( TYPE_T, CMD ) \ 68 | template<> inline simd_type< TYPE_T, sse_tag > \ 69 | bit_xor< TYPE_T, sse_tag >( simd_type< TYPE_T, sse_tag > lhs, \ 70 | simd_type< TYPE_T, sse_tag > rhs ) { \ 71 | return CMD( lhs, rhs ); } 72 | 73 | DEF_BIT_XOR( int8_t, _mm_xor_si128 ) 74 | DEF_BIT_XOR( int16_t, _mm_xor_si128 ) 75 | DEF_BIT_XOR( int32_t, _mm_xor_si128 ) 76 | DEF_BIT_XOR( int64_t, _mm_xor_si128 ) 77 | DEF_BIT_XOR( float, _mm_xor_ps ) 78 | DEF_BIT_XOR( double, _mm_xor_pd ) 79 | #undef DEF_BIT_XOR 80 | 81 | } // namespace litesimd 82 | 83 | #endif // LITESIMD_HAS_SSE 84 | #endif // LITESIMD_SSE_BITWISE_H 85 | -------------------------------------------------------------------------------- /include/litesimd/detail/arch/avx/bitwise.h: -------------------------------------------------------------------------------- 1 | // MIT License 2 | // 3 | // Copyright (c) 2018 André Tupinambá 4 | // 5 | // Permission is hereby granted, free of charge, to any person obtaining a copy 6 | // of this software and associated documentation files (the "Software"), to deal 7 | // in the Software without restriction, including without limitation the rights 8 | // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | // copies of the Software, and to permit persons to whom the Software is 10 | // furnished to do so, subject to the following conditions: 11 | // 12 | // The above copyright notice and this permission notice shall be included in all 13 | // copies or substantial portions of the Software. 14 | // 15 | // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | // SOFTWARE. 22 | 23 | #ifndef LITESIMD_AVX_BITWISE_H 24 | #define LITESIMD_AVX_BITWISE_H 25 | 26 | #ifdef LITESIMD_HAS_AVX 27 | 28 | #include 29 | #include 30 | 31 | namespace litesimd { 32 | 33 | // Bit AND 34 | // --------------------------------------------------------------------------------------- 35 | #define DEF_BIT_AND( TYPE_T, CMD ) \ 36 | template<> inline simd_type< TYPE_T, avx_tag > \ 37 | bit_and< TYPE_T, avx_tag >( simd_type< TYPE_T, avx_tag > lhs, \ 38 | simd_type< TYPE_T, avx_tag > rhs ) { \ 39 | return CMD( lhs, rhs ); } 40 | 41 | DEF_BIT_AND( int8_t, _mm256_and_si256 ) 42 | DEF_BIT_AND( int16_t, _mm256_and_si256 ) 43 | DEF_BIT_AND( int32_t, _mm256_and_si256 ) 44 | DEF_BIT_AND( int64_t, _mm256_and_si256 ) 45 | DEF_BIT_AND( float, _mm256_and_ps ) 46 | DEF_BIT_AND( double, _mm256_and_pd ) 47 | #undef DEF_BIT_AND 48 | 49 | // Bit OR 50 | // --------------------------------------------------------------------------------------- 51 | #define DEF_BIT_OR( TYPE_T, CMD ) \ 52 | template<> inline simd_type< TYPE_T, avx_tag > \ 53 | bit_or< TYPE_T, avx_tag >( simd_type< TYPE_T, avx_tag > lhs, \ 54 | simd_type< TYPE_T, avx_tag > rhs ) { \ 55 | return CMD( lhs, rhs ); } 56 | 57 | DEF_BIT_OR( int8_t, _mm256_or_si256 ) 58 | DEF_BIT_OR( int16_t, _mm256_or_si256 ) 59 | DEF_BIT_OR( int32_t, _mm256_or_si256 ) 60 | DEF_BIT_OR( int64_t, _mm256_or_si256 ) 61 | DEF_BIT_OR( float, _mm256_or_ps ) 62 | DEF_BIT_OR( double, _mm256_or_pd ) 63 | #undef DEF_BIT_OR 64 | 65 | // Bit XOR 66 | // --------------------------------------------------------------------------------------- 67 | #define DEF_BIT_XOR( TYPE_T, CMD ) \ 68 | template<> inline simd_type< TYPE_T, avx_tag > \ 69 | bit_xor< TYPE_T, avx_tag >( simd_type< TYPE_T, avx_tag > lhs, \ 70 | simd_type< TYPE_T, avx_tag > rhs ) { \ 71 | return CMD( lhs, rhs ); } 72 | 73 | DEF_BIT_XOR( int8_t, _mm256_xor_si256 ) 74 | DEF_BIT_XOR( int16_t, _mm256_xor_si256 ) 75 | DEF_BIT_XOR( int32_t, _mm256_xor_si256 ) 76 | DEF_BIT_XOR( int64_t, _mm256_xor_si256 ) 77 | DEF_BIT_XOR( float, _mm256_xor_ps ) 78 | DEF_BIT_XOR( double, _mm256_xor_pd ) 79 | #undef DEF_BIT_XOR 80 | 81 | } // namespace litesimd 82 | 83 | #endif // LITESIMD_HAS_AVX 84 | #endif // LITESIMD_AVX_BITWISE_H 85 | -------------------------------------------------------------------------------- /include/litesimd/detail/helper_macros.h: -------------------------------------------------------------------------------- 1 | // MIT License 2 | // 3 | // Copyright (c) 2018 André Tupinambá 4 | // 5 | // Permission is hereby granted, free of charge, to any person obtaining a copy 6 | // of this software and associated documentation files (the "Software"), to deal 7 | // in the Software without restriction, including without limitation the rights 8 | // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | // copies of the Software, and to permit persons to whom the Software is 10 | // furnished to do so, subject to the following conditions: 11 | // 12 | // The above copyright notice and this permission notice shall be included in all 13 | // copies or substantial portions of the Software. 14 | // 15 | // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | // SOFTWARE. 22 | 23 | #ifndef LITESIMD_DETAIL_HELPER_MACROS_H 24 | #define LITESIMD_DETAIL_HELPER_MACROS_H 25 | 26 | // SIMD Functions 27 | // --------------------------------------------------------------------------------------- 28 | #define DEFINE_UNARY_SIMD_FUNCTION( TYPE_T, TAG_T, RET_T, PARAM_T, NAME, CMD ) \ 29 | template<> inline typename simd_type< TYPE_T, TAG_T >::RET_T \ 30 | NAME< TYPE_T, TAG_T >( typename simd_type< TYPE_T, TAG_T>::PARAM_T parm0 ) { \ 31 | return CMD( parm0 ); } 32 | 33 | #define DEFINE_BINARY_SIMD_FUNCTION( TYPE_T, TAG_T, RET_T, PARAM_T, NAME, CMD ) \ 34 | template<> inline typename simd_type< TYPE_T, TAG_T >::RET_T \ 35 | NAME< TYPE_T, TAG_T >( typename simd_type< TYPE_T, TAG_T>::PARAM_T parm0, \ 36 | typename simd_type< TYPE_T, TAG_T>::PARAM_T parm1 ) { \ 37 | return CMD( parm0, parm1 ); } 38 | 39 | // Binary function adaptors 40 | // --------------------------------------------------------------------------------------- 41 | #define DEFINE_BINARY_FUNCTION_SIMD_TYPE_ADAPTOR( NAME, RET_T ) \ 42 | template< typename SimdType_T, \ 43 | typename SimdType_T::simd_value_type* = nullptr > \ 44 | inline typename simd_type< typename SimdType_T::simd_value_type, \ 45 | typename SimdType_T::simd_tag >::RET_T \ 46 | NAME( SimdType_T lhs, SimdType_T rhs ) { \ 47 | return NAME< typename SimdType_T::simd_value_type, \ 48 | typename SimdType_T::simd_tag >( lhs, rhs ); } 49 | 50 | #define DEFINE_BINARY_FUNCTION_LEFT_VALUE_ADAPTOR( NAME, RET_T ) \ 51 | template< typename ValueType_T, typename Tag_T = default_tag > \ 52 | inline typename simd_type< ValueType_T, Tag_T >::RET_T \ 53 | NAME( ValueType_T lhs, simd_type< ValueType_T, Tag_T > rhs ) { \ 54 | return NAME< ValueType_T, Tag_T >( simd_type< ValueType_T, Tag_T >( lhs ), rhs ); } 55 | 56 | #define DEFINE_BINARY_FUNCTION_RIGHT_VALUE_ADAPTOR( NAME, RET_T ) \ 57 | template< typename ValueType_T, typename Tag_T = default_tag > \ 58 | inline typename simd_type< ValueType_T, Tag_T >::RET_T \ 59 | NAME( simd_type< ValueType_T, Tag_T > lhs, ValueType_T rhs ) { \ 60 | return NAME< ValueType_T, Tag_T >( lhs, simd_type< ValueType_T, Tag_T >( rhs ) ); } 61 | 62 | #define DEFINE_BINARY_FUNCTION_ADAPTORS( NAME, RET_T ) \ 63 | DEFINE_BINARY_FUNCTION_SIMD_TYPE_ADAPTOR( NAME, RET_T ) \ 64 | DEFINE_BINARY_FUNCTION_LEFT_VALUE_ADAPTOR( NAME, RET_T ) \ 65 | DEFINE_BINARY_FUNCTION_RIGHT_VALUE_ADAPTOR( NAME, RET_T ) 66 | 67 | #endif // LITESIMD_DETAIL_HELPER_MACROS_H 68 | -------------------------------------------------------------------------------- /.travis.yml: -------------------------------------------------------------------------------- 1 | dist: trusty 2 | language: cpp 3 | sudo: false 4 | 5 | common_sources: &all_sources 6 | - ubuntu-toolchain-r-test 7 | - llvm-toolchain-trusty 8 | - llvm-toolchain-trusty-3.9 9 | - llvm-toolchain-trusty-4.0 10 | - llvm-toolchain-trusty-5.0 11 | - llvm-toolchain-trusty-6.0 12 | - sourceline: 'ppa:mhier/libboost-latest' 13 | 14 | matrix: 15 | exclude: # On OSX g++ is a symlink to clang++ by default 16 | - os: osx 17 | compiler: gcc 18 | 19 | include: 20 | - os: linux 21 | compiler: gcc 22 | addons: 23 | apt: 24 | sources: *all_sources 25 | packages: ['gcc-7', 'g++-7', 'cmake', 'boost1.67'] 26 | env: MYCC='gcc-7' MYCXX='g++-7' 27 | 28 | - os: linux 29 | compiler: gcc 30 | addons: 31 | apt: 32 | sources: *all_sources 33 | packages: ['gcc-6', 'g++-6', 'cmake', 'boost1.67'] 34 | env: MYCC='gcc-6' MYCXX='g++-6' 35 | 36 | - os: linux 37 | compiler: gcc 38 | addons: 39 | apt: 40 | sources: *all_sources 41 | packages: ['gcc-5', 'g++-5', 'cmake', 'boost1.67'] 42 | env: MYCC='gcc-5' MYCXX='g++-5' 43 | 44 | - os: linux 45 | compiler: gcc 46 | addons: 47 | apt: 48 | sources: *all_sources 49 | packages: ['gcc-4.9', 'g++-4.9', 'cmake', 'boost1.67'] 50 | env: MYCC='gcc-4.9' MYCXX='g++-4.9' 51 | 52 | - os: linux 53 | compiler: gcc 54 | addons: 55 | apt: 56 | sources: *all_sources 57 | packages: ['gcc-4.8', 'g++-4.8', 'cmake', 'boost1.67'] 58 | env: MYCC='gcc-4.8' MYCXX='g++-4.8' 59 | 60 | - os: linux 61 | compiler: clang 62 | addons: 63 | apt: 64 | sources: *all_sources 65 | packages: ['clang-6.0', 'libstdc++-6-dev', 'cmake', 'boost1.67'] 66 | env: MYCC='clang-6.0' MYCXX='clang++-6.0' 67 | 68 | - os: linux 69 | compiler: clang 70 | addons: 71 | apt: 72 | sources: *all_sources 73 | packages: ['clang-5.0', 'libstdc++-5-dev', 'cmake', 'boost1.67'] 74 | env: MYCC='clang-5.0' MYCXX='clang++-5.0' 75 | 76 | - os: linux 77 | compiler: clang 78 | addons: 79 | apt: 80 | sources: *all_sources 81 | packages: ['clang-4.0', 'libstdc++-4.8-dev', 'cmake', 'boost1.67'] 82 | env: MYCC='clang-4.0' MYCXX='clang++-4.0' 83 | 84 | - os: osx 85 | osx_image: xcode9.4 86 | compiler: clang 87 | env: MYCC='clang' MYCXX='clang++' 88 | 89 | - os: osx 90 | osx_image: xcode9.3 91 | compiler: clang 92 | env: MYCC='clang' MYCXX='clang++' 93 | 94 | - os: osx 95 | osx_image: xcode9.2 96 | compiler: clang 97 | env: MYCC='clang' MYCXX='clang++' 98 | 99 | - os: osx 100 | osx_image: xcode9.1 101 | compiler: clang 102 | env: MYCC='clang' MYCXX='clang++' 103 | 104 | - os: osx 105 | osx_image: xcode9 106 | compiler: clang 107 | env: MYCC='clang' MYCXX='clang++' 108 | 109 | - os: osx 110 | osx_image: xcode8.3 111 | compiler: clang 112 | env: MYCC='clang' MYCXX='clang++' 113 | 114 | before_script: 115 | - | 116 | if [[ "${TRAVIS_OS_NAME}" == "linux" ]]; then 117 | CMAKE_URL="https://cmake.org/files/v3.12/cmake-3.12.2-Linux-x86_64.tar.gz" 118 | mkdir cmake && travis_retry wget --no-check-certificate --quiet -O - ${CMAKE_URL} | tar --strip-components=1 -xz -C cmake 119 | export PATH=${PWD}/cmake/bin:${PATH} 120 | fi 121 | 122 | script: 123 | - export CC=${MYCC} 124 | - export CXX=${MYCXX} 125 | - cmake --version 126 | - ${CXX} -v 127 | - (mkdir build && cd build && cmake -DCMAKE_BUILD_TYPE=Release .. && cmake --build . && ctest -V .) 128 | -------------------------------------------------------------------------------- /include/litesimd/intravector.h: -------------------------------------------------------------------------------- 1 | // MIT License 2 | // 3 | // Copyright (c) 2018 André Tupinambá 4 | // 5 | // Permission is hereby granted, free of charge, to any person obtaining a copy 6 | // of this software and associated documentation files (the "Software"), to deal 7 | // in the Software without restriction, including without limitation the rights 8 | // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | // copies of the Software, and to permit persons to whom the Software is 10 | // furnished to do so, subject to the following conditions: 11 | // 12 | // The above copyright notice and this permission notice shall be included in all 13 | // copies or substantial portions of the Software. 14 | // 15 | // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | // SOFTWARE. 22 | 23 | #ifndef LITESIMD_INTRAVECTOR_H 24 | #define LITESIMD_INTRAVECTOR_H 25 | 26 | #include 27 | 28 | namespace litesimd { 29 | 30 | /** 31 | * \defgroup intravector Intravector operations 32 | * 33 | * In litesimd, the intravector group has functions which operates between 34 | * the values of one SIMD register. 35 | * 36 | * All this functions are accessable at `` 37 | */ 38 | 39 | /** 40 | * \ingroup intravector 41 | * \brief Apply a generic SIMD binary function to reduce all SIMD values to a single one. 42 | * 43 | * The SIMD binary function should receive 2 simd_type and return the same type. 44 | * Lambda functions could be used as well. 45 | * 46 | * ```{.cpp} 47 | * ls::t_int32_simd vec( 1 ); 48 | * ls::horizontal( vec, []( ls::t_int32_simd x, ls::t_int32_simd y ) 49 | * { 50 | * return (x ^ y) | 1; 51 | * } ); 52 | * ``` 53 | * 54 | * \param vec SIMD register to be reduced 55 | * \param func SIMD binary function 56 | * \tparam ValueType_T Base type of original SIMD register 57 | * \tparam Function_T Binary function type 58 | * \returns The result of reduction 59 | * 60 | * **Example** 61 | * ```{.cpp} 62 | * #include 63 | * #include 64 | * #include 65 | * #include 66 | * 67 | * int main() 68 | * { 69 | * namespace ls = litesimd; 70 | * 71 | * using func_t = ls::t_int32_simd(*)(ls::t_int32_simd, ls::t_int32_simd); 72 | * 73 | * ls::t_int32_simd x( 1, 2, 3, 4 ); 74 | * std::cout << "horizontal( x, add ): " 75 | * << ls::horizontal( x, static_cast< func_t >(ls::add< int32_t >) ) 76 | * << std::endl; 77 | * return 0; 78 | * } 79 | * ``` 80 | * Output on a SSE compilation 81 | * ``` 82 | * horizontal( x, add ): 10 83 | * ``` 84 | */ 85 | template< typename ValueType_T, typename Function_T, typename Tag_T = default_tag > 86 | inline ValueType_T horizontal( simd_type< ValueType_T, Tag_T > vec, Function_T func ) 87 | { 88 | return intravector_op< ValueType_T, Tag_T >()( vec, func ); 89 | } 90 | 91 | template< typename SimdType_T, typename Function_T, 92 | typename SimdType_T::simd_value_type* = nullptr > 93 | inline typename SimdType_T::simd_value_type 94 | horizontal( SimdType_T vec, Function_T func ) 95 | { 96 | return horizontal< typename SimdType_T::simd_value_type, Function_T, 97 | typename SimdType_T::simd_tag >( vec, func ); 98 | } 99 | 100 | } // namespace litesimd 101 | 102 | #endif // LITESIMD_INTRAVECTOR_H 103 | -------------------------------------------------------------------------------- /include/litesimd/arithmetic.h: -------------------------------------------------------------------------------- 1 | // MIT License 2 | // 3 | // Copyright (c) 2018 André Tupinambá 4 | // 5 | // Permission is hereby granted, free of charge, to any person obtaining a copy 6 | // of this software and associated documentation files (the "Software"), to deal 7 | // in the Software without restriction, including without limitation the rights 8 | // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | // copies of the Software, and to permit persons to whom the Software is 10 | // furnished to do so, subject to the following conditions: 11 | // 12 | // The above copyright notice and this permission notice shall be included in all 13 | // copies or substantial portions of the Software. 14 | // 15 | // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | // SOFTWARE. 22 | 23 | #ifndef LITESIMD_ARITHMETIC_H 24 | #define LITESIMD_ARITHMETIC_H 25 | 26 | #include 27 | #include 28 | #include 29 | 30 | namespace litesimd { 31 | 32 | /** 33 | * \defgroup arithmetic Arithmetic operations 34 | * 35 | * In litesimd, arithmetic group has binary functions to execute the basic arithmetic operations 36 | * (addition, subtraction, multiplication and division). 37 | * 38 | * All this functions are accessable at `` 39 | */ 40 | 41 | // Basic operations 42 | // --------------------------------------------------------------------------------------- 43 | DEFINE_BINARY_FUNCTION_ADAPTORS( add, type ) 44 | DEFINE_BINARY_FUNCTION_ADAPTORS( sub, type ) 45 | DEFINE_BINARY_FUNCTION_ADAPTORS( mullo, type ) 46 | DEFINE_BINARY_FUNCTION_ADAPTORS( mulhi, type ) 47 | DEFINE_BINARY_FUNCTION_ADAPTORS( div, type ) 48 | 49 | /** 50 | * \ingroup arithmetic 51 | * \brief Returns the sum of two values. 52 | * 53 | * \param lhs, rhs Values to be added. 54 | * \returns Sum of the two values. 55 | * 56 | * **Example** 57 | * ```{.cpp} 58 | * #include 59 | * #include 60 | * #include 61 | * #include 62 | * 63 | * int main() 64 | * { 65 | * namespace ls = litesimd; 66 | * 67 | * ls::t_int32_simd a( 1 ), b( 10 ); 68 | * std::cout << "a + b: " << a + b << std::endl; 69 | * return 0; 70 | * } 71 | * ``` 72 | * Output on a SSE compilation 73 | * ``` 74 | * a + b: (11, 11, 11, 11) 75 | * ``` 76 | */ 77 | template< typename LHS, typename RHS > 78 | inline auto operator+( LHS lhs, RHS rhs ) -> decltype( add( lhs, rhs ) ) 79 | { 80 | return add( lhs, rhs ); 81 | } 82 | 83 | /** 84 | * \ingroup arithmetic 85 | * \brief Returns the subtraction of two values. 86 | * 87 | * \param lhs, rhs Values to be subtracted. 88 | * \returns Difference of the two values. 89 | * 90 | * **Example** 91 | * ```{.cpp} 92 | * #include 93 | * #include 94 | * #include 95 | * #include 96 | * 97 | * int main() 98 | * { 99 | * namespace ls = litesimd; 100 | * 101 | * ls::t_int32_simd a( 10 ), b( 1 ); 102 | * std::cout << "a - b: " << a - b << std::endl; 103 | * return 0; 104 | * } 105 | * ``` 106 | * Output on a SSE compilation 107 | * ``` 108 | * a - b: (9, 9, 9, 9) 109 | * ``` 110 | */ 111 | template< typename LHS, typename RHS > 112 | inline auto operator-( LHS lhs, RHS rhs ) -> decltype( sub( lhs, rhs ) ) 113 | { 114 | return sub( lhs, rhs ); 115 | } 116 | 117 | } // namespace litesimd 118 | 119 | #endif // LITESIMD_ARITHMETIC_H 120 | 121 | -------------------------------------------------------------------------------- /include/litesimd/detail/arch/sse/arithmetic.h: -------------------------------------------------------------------------------- 1 | // MIT License 2 | // 3 | // Copyright (c) 2018 André Tupinambá 4 | // 5 | // Permission is hereby granted, free of charge, to any person obtaining a copy 6 | // of this software and associated documentation files (the "Software"), to deal 7 | // in the Software without restriction, including without limitation the rights 8 | // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | // copies of the Software, and to permit persons to whom the Software is 10 | // furnished to do so, subject to the following conditions: 11 | // 12 | // The above copyright notice and this permission notice shall be included in all 13 | // copies or substantial portions of the Software. 14 | // 15 | // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | // SOFTWARE. 22 | 23 | #ifndef LITESIMD_SSE_ARITHMETIC_H 24 | #define LITESIMD_SSE_ARITHMETIC_H 25 | 26 | #ifdef LITESIMD_HAS_SSE 27 | 28 | #include 29 | #include 30 | 31 | namespace litesimd { 32 | 33 | // Add 34 | // --------------------------------------------------------------------------------------- 35 | #define DEF_ADD( TYPE_T, CMD ) \ 36 | template<> inline simd_type< TYPE_T, sse_tag > \ 37 | add< TYPE_T, sse_tag >( simd_type< TYPE_T, sse_tag > lhs, \ 38 | simd_type< TYPE_T, sse_tag > rhs ) { \ 39 | return CMD( lhs, rhs ); } 40 | 41 | DEF_ADD( int8_t, _mm_add_epi8 ) 42 | DEF_ADD( int16_t, _mm_add_epi16 ) 43 | DEF_ADD( int32_t, _mm_add_epi32 ) 44 | DEF_ADD( int64_t, _mm_add_epi64 ) 45 | DEF_ADD( float, _mm_add_ps ) 46 | DEF_ADD( double, _mm_add_pd ) 47 | #undef DEF_ADD 48 | 49 | // Sub 50 | // --------------------------------------------------------------------------------------- 51 | #define DEF_SUB( TYPE_T, CMD ) \ 52 | template<> inline simd_type< TYPE_T, sse_tag > \ 53 | sub< TYPE_T, sse_tag >( simd_type< TYPE_T, sse_tag > lhs, \ 54 | simd_type< TYPE_T, sse_tag > rhs ) { \ 55 | return CMD( lhs, rhs ); } 56 | 57 | DEF_SUB( int8_t, _mm_sub_epi8 ) 58 | DEF_SUB( int16_t, _mm_sub_epi16 ) 59 | DEF_SUB( int32_t, _mm_sub_epi32 ) 60 | DEF_SUB( int64_t, _mm_sub_epi64 ) 61 | DEF_SUB( float, _mm_sub_ps ) 62 | DEF_SUB( double, _mm_sub_pd ) 63 | #undef DEF_SUB 64 | 65 | // MulLo 66 | // --------------------------------------------------------------------------------------- 67 | #define DEF_MULLO( TYPE_T, CMD ) \ 68 | template<> inline simd_type< TYPE_T, sse_tag > \ 69 | mullo< TYPE_T, sse_tag >( simd_type< TYPE_T, sse_tag > lhs, \ 70 | simd_type< TYPE_T, sse_tag > rhs ) { \ 71 | return CMD( lhs, rhs ); } 72 | 73 | DEF_MULLO( int16_t, _mm_mullo_epi16 ) 74 | DEF_MULLO( int32_t, _mm_mullo_epi32 ) 75 | #undef DEF_MULLO 76 | 77 | // MulHi 78 | // --------------------------------------------------------------------------------------- 79 | #define DEF_MULHI( TYPE_T, CMD ) \ 80 | template<> inline simd_type< TYPE_T, sse_tag > \ 81 | mulhi< TYPE_T, sse_tag >( simd_type< TYPE_T, sse_tag > lhs, \ 82 | simd_type< TYPE_T, sse_tag > rhs ) { \ 83 | return CMD( lhs, rhs ); } 84 | 85 | DEF_MULHI( int16_t, _mm_mulhi_epi16 ) 86 | #undef DEF_MULHI 87 | 88 | // Div 89 | // --------------------------------------------------------------------------------------- 90 | #define DEF_DIV( TYPE_T, CMD ) \ 91 | template<> inline simd_type< TYPE_T, sse_tag > \ 92 | div< TYPE_T, sse_tag >( simd_type< TYPE_T, sse_tag > lhs, \ 93 | simd_type< TYPE_T, sse_tag > rhs ) { \ 94 | return CMD( lhs, rhs ); } 95 | 96 | DEF_DIV( float, _mm_div_ps ) 97 | DEF_DIV( double, _mm_div_pd ) 98 | #undef DEF_DIV 99 | 100 | } // namespace litesimd 101 | 102 | #endif // LITESIMD_HAS_SSE 103 | #endif // LITESIMD_SSE_ARITHMETIC_H 104 | -------------------------------------------------------------------------------- /include/litesimd/detail/arch/avx/arithmetic.h: -------------------------------------------------------------------------------- 1 | // MIT License 2 | // 3 | // Copyright (c) 2018 André Tupinambá 4 | // 5 | // Permission is hereby granted, free of charge, to any person obtaining a copy 6 | // of this software and associated documentation files (the "Software"), to deal 7 | // in the Software without restriction, including without limitation the rights 8 | // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | // copies of the Software, and to permit persons to whom the Software is 10 | // furnished to do so, subject to the following conditions: 11 | // 12 | // The above copyright notice and this permission notice shall be included in all 13 | // copies or substantial portions of the Software. 14 | // 15 | // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | // SOFTWARE. 22 | 23 | #ifndef LITESIMD_AVX_ARITHMETIC_H 24 | #define LITESIMD_AVX_ARITHMETIC_H 25 | 26 | #ifdef LITESIMD_HAS_AVX 27 | 28 | #include 29 | #include 30 | 31 | namespace litesimd { 32 | 33 | // Add 34 | // --------------------------------------------------------------------------------------- 35 | #define DEF_ADD( TYPE_T, CMD ) \ 36 | template<> inline simd_type< TYPE_T, avx_tag > \ 37 | add< TYPE_T, avx_tag >( simd_type< TYPE_T, avx_tag > lhs, \ 38 | simd_type< TYPE_T, avx_tag > rhs ) { \ 39 | return CMD( lhs, rhs ); } 40 | 41 | DEF_ADD( int8_t, _mm256_add_epi8 ) 42 | DEF_ADD( int16_t, _mm256_add_epi16 ) 43 | DEF_ADD( int32_t, _mm256_add_epi32 ) 44 | DEF_ADD( int64_t, _mm256_add_epi64 ) 45 | DEF_ADD( float, _mm256_add_ps ) 46 | DEF_ADD( double, _mm256_add_pd ) 47 | #undef DEF_ADD 48 | 49 | // Sub 50 | // --------------------------------------------------------------------------------------- 51 | #define DEF_SUB( TYPE_T, CMD ) \ 52 | template<> inline simd_type< TYPE_T, avx_tag > \ 53 | sub< TYPE_T, avx_tag >( simd_type< TYPE_T, avx_tag > lhs, \ 54 | simd_type< TYPE_T, avx_tag > rhs ) { \ 55 | return CMD( lhs, rhs ); } 56 | 57 | DEF_SUB( int8_t, _mm256_sub_epi8 ) 58 | DEF_SUB( int16_t, _mm256_sub_epi16 ) 59 | DEF_SUB( int32_t, _mm256_sub_epi32 ) 60 | DEF_SUB( int64_t, _mm256_sub_epi64 ) 61 | DEF_SUB( float, _mm256_sub_ps ) 62 | DEF_SUB( double, _mm256_sub_pd ) 63 | #undef DEF_SUB 64 | 65 | // MulLo 66 | // --------------------------------------------------------------------------------------- 67 | #define DEF_MULLO( TYPE_T, CMD ) \ 68 | template<> inline simd_type< TYPE_T, avx_tag > \ 69 | mullo< TYPE_T, avx_tag >( simd_type< TYPE_T, avx_tag > lhs, \ 70 | simd_type< TYPE_T, avx_tag > rhs ) { \ 71 | return CMD( lhs, rhs ); } 72 | 73 | DEF_MULLO( int16_t, _mm256_mullo_epi16 ) 74 | DEF_MULLO( int32_t, _mm256_mullo_epi32 ) 75 | #undef DEF_MULLO 76 | 77 | // MulHi 78 | // --------------------------------------------------------------------------------------- 79 | #define DEF_MULHI( TYPE_T, CMD ) \ 80 | template<> inline simd_type< TYPE_T, avx_tag > \ 81 | mulhi< TYPE_T, avx_tag >( simd_type< TYPE_T, avx_tag > lhs, \ 82 | simd_type< TYPE_T, avx_tag > rhs ) { \ 83 | return CMD( lhs, rhs ); } 84 | 85 | DEF_MULHI( int16_t, _mm256_mulhi_epi16 ) 86 | #undef DEF_MULHI 87 | 88 | // Div 89 | // --------------------------------------------------------------------------------------- 90 | #define DEF_DIV( TYPE_T, CMD ) \ 91 | template<> inline simd_type< TYPE_T, avx_tag > \ 92 | div< TYPE_T, avx_tag >( simd_type< TYPE_T, avx_tag > lhs, \ 93 | simd_type< TYPE_T, avx_tag > rhs ) { \ 94 | return CMD( lhs, rhs ); } 95 | 96 | DEF_DIV( float, _mm256_div_ps ) 97 | DEF_DIV( double, _mm256_div_pd ) 98 | #undef DEF_DIV 99 | 100 | } // namespace litesimd 101 | 102 | #endif // LITESIMD_HAS_AVX 103 | #endif // LITESIMD_AVX_ARITHMETIC_H 104 | -------------------------------------------------------------------------------- /samples/greater/greater.cpp: -------------------------------------------------------------------------------- 1 | // MIT License 2 | // 3 | // Copyright (c) 2018 André Tupinambá 4 | // 5 | // Permission is hereby granted, free of charge, to any person obtaining a copy 6 | // of this software and associated documentation files (the "Software"), to deal 7 | // in the Software without restriction, including without limitation the rights 8 | // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | // copies of the Software, and to permit persons to whom the Software is 10 | // furnished to do so, subject to the following conditions: 11 | // 12 | // The above copyright notice and this permission notice shall be included in all 13 | // copies or substantial portions of the Software. 14 | // 15 | // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | // SOFTWARE. 22 | 23 | // Compiled with 24 | // 25 | // g++ -std=c++14 -O3 -avx2 -I greater.cpp -o greater 26 | 27 | #include 28 | #include 29 | #include 30 | 31 | int main() 32 | { 33 | namespace ls = litesimd; 34 | 35 | // int32_t vector in default instruction set (SSE/AVX) 36 | ls::t_int32_simd cmp; 37 | 38 | // litesimd types are intrincs compatible 39 | #ifdef LITESIMD_HAS_AVX 40 | cmp = _mm256_set_epi32( 80, 70, 60, 50, 40, 30, 20, 10 ); 41 | #else 42 | // No AVX, using SSE 43 | cmp = _mm_set_epi32( 40, 30, 20, 10 ); 44 | #endif // LITESIMD_HAS_AVX 45 | 46 | int32_t val = 5; 47 | 48 | // int32_simd_size is how many int32_t fits on t_int32_simd (4 - SSE, 8 - AVX) 49 | for( size_t i = 0; i <= ls::t_int32_simd::simd_size; ++i ) 50 | { 51 | // Compare 'val' against all 'cmp' values 52 | uint32_t mask = ls::greater_bitmask( val, cmp ); 53 | 54 | // As 'cmp' is sorted, we can use the bitmask to find the 55 | // last item which 'val' is greater 56 | // 57 | // Returns values between [-1, ls::int32_simd_size) 58 | int index = ls::bitmask_last_index< int32_t >( mask ); 59 | 60 | // greater_last_index could be called instead 61 | // greater_bitmask + bitmask_last_index 62 | // 63 | // int index = ls::greater_last_index( val, cmp ); 64 | 65 | if( index < 0 ) 66 | { 67 | std::cout << "The value " << val 68 | << " is less than all values of " << cmp 69 | << std::endl; 70 | } 71 | else if( index == ls::t_int32_simd::simd_size-1 ) 72 | { 73 | std::cout << "The value " << val 74 | << " is greater than all values of " << cmp 75 | << std::endl; 76 | } 77 | else 78 | { 79 | std::cout << "The value " << val 80 | << " is between items " << index 81 | << " and " << index + 1 82 | << " of " << cmp 83 | << std::endl; 84 | } 85 | 86 | val += 10; 87 | } 88 | return 0; 89 | } 90 | 91 | // The output on AVX will be 92 | // 93 | // $ ./greater 94 | // The value 5 is less than all values of (80, 70, 60, 50, 40, 30, 20, 10) 95 | // The value 15 is between items 0 and 1 of (80, 70, 60, 50, 40, 30, 20, 10) 96 | // The value 25 is between items 1 and 2 of (80, 70, 60, 50, 40, 30, 20, 10) 97 | // The value 35 is between items 2 and 3 of (80, 70, 60, 50, 40, 30, 20, 10) 98 | // The value 45 is between items 3 and 4 of (80, 70, 60, 50, 40, 30, 20, 10) 99 | // The value 55 is between items 4 and 5 of (80, 70, 60, 50, 40, 30, 20, 10) 100 | // The value 65 is between items 5 and 6 of (80, 70, 60, 50, 40, 30, 20, 10) 101 | // The value 75 is between items 6 and 7 of (80, 70, 60, 50, 40, 30, 20, 10) 102 | // The value 85 is greater than all values of (80, 70, 60, 50, 40, 30, 20, 10) 103 | -------------------------------------------------------------------------------- /include/litesimd/detail/arch/sse/intravector.h: -------------------------------------------------------------------------------- 1 | // MIT License 2 | // 3 | // Copyright (c) 2018 André Tupinambá 4 | // 5 | // Permission is hereby granted, free of charge, to any person obtaining a copy 6 | // of this software and associated documentation files (the "Software"), to deal 7 | // in the Software without restriction, including without limitation the rights 8 | // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | // copies of the Software, and to permit persons to whom the Software is 10 | // furnished to do so, subject to the following conditions: 11 | // 12 | // The above copyright notice and this permission notice shall be included in all 13 | // copies or substantial portions of the Software. 14 | // 15 | // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | // SOFTWARE. 22 | 23 | #ifndef LITESIMD_ARCH_SSE_INTRAVECTOR_H 24 | #define LITESIMD_ARCH_SSE_INTRAVECTOR_H 25 | 26 | #ifdef LITESIMD_HAS_SSE 27 | 28 | #include 29 | 30 | namespace litesimd { 31 | 32 | template<> 33 | struct intravector_op< int8_t, sse_tag > 34 | { 35 | template< typename Function_T > 36 | int8_t inline operator()( simd_type< int8_t, sse_tag > vec, Function_T func ) 37 | { 38 | vec = func( vec, _mm_shuffle_epi32( vec, _MM_SHUFFLE( 0, 0, 3, 2 ) ) ); 39 | vec = func( vec, _mm_shuffle_epi32( vec, _MM_SHUFFLE( 0, 0, 0, 1 ) ) ); 40 | vec = func( vec, _mm_shufflelo_epi16( vec, _MM_SHUFFLE( 0, 0, 0, 1 ) ) ); 41 | vec = func( vec, _mm_srli_epi16( vec, 8 ) ); 42 | return (int8_t)_mm_cvtsi128_si32( vec ); 43 | } 44 | }; 45 | 46 | template<> 47 | struct intravector_op< int16_t, sse_tag > 48 | { 49 | template< typename Function_T > 50 | int16_t inline operator()( simd_type< int16_t, sse_tag > vec, Function_T func ) 51 | { 52 | vec = func( vec, _mm_shuffle_epi32( vec, _MM_SHUFFLE( 0, 0, 3, 2 ) ) ); 53 | vec = func( vec, _mm_shuffle_epi32( vec, _MM_SHUFFLE( 0, 0, 0, 1 ) ) ); 54 | vec = func( vec, _mm_shufflelo_epi16( vec, _MM_SHUFFLE( 0, 0, 0, 1 ) ) ); 55 | return (int16_t)_mm_cvtsi128_si32( vec ); 56 | } 57 | }; 58 | 59 | template<> 60 | struct intravector_op< int32_t, sse_tag > 61 | { 62 | template< typename Function_T > 63 | int32_t inline operator()( simd_type< int32_t, sse_tag > vec, Function_T func ) 64 | { 65 | vec = func( vec, _mm_shuffle_epi32( vec, _MM_SHUFFLE( 0, 0, 3, 2 ) ) ); 66 | vec = func( vec, _mm_shuffle_epi32( vec, _MM_SHUFFLE( 0, 0, 0, 1 ) ) ); 67 | return _mm_cvtsi128_si32( vec ); 68 | } 69 | }; 70 | 71 | template<> 72 | struct intravector_op< int64_t, sse_tag > 73 | { 74 | template< typename Function_T > 75 | int64_t inline operator()( simd_type< int64_t, sse_tag > vec, Function_T func ) 76 | { 77 | vec = func( vec, _mm_shuffle_epi32( vec, _MM_SHUFFLE( 0, 0, 3, 2 ) ) ); 78 | return _mm_cvtsi128_si64( vec ); 79 | } 80 | }; 81 | 82 | template<> 83 | struct intravector_op< float, sse_tag > 84 | { 85 | template< typename Function_T > 86 | float inline operator()( simd_type< float, sse_tag > vec, Function_T func ) 87 | { 88 | vec = func( vec, _mm_shuffle_ps( vec, vec, _MM_SHUFFLE( 0, 0, 3, 2 ) ) ); 89 | vec = func( vec, _mm_shuffle_ps( vec, vec, _MM_SHUFFLE( 0, 0, 0, 1 ) ) ); 90 | return _mm_cvtss_f32( vec ); 91 | } 92 | }; 93 | 94 | template<> 95 | struct intravector_op< double, sse_tag > 96 | { 97 | template< typename Function_T > 98 | double inline operator()( simd_type< double, sse_tag > vec, Function_T func ) 99 | { 100 | vec = func( vec, _mm_shuffle_pd( vec, vec, _MM_SHUFFLE2( 0, 1 ) ) ); 101 | return _mm_cvtsd_f64( vec ); 102 | } 103 | }; 104 | 105 | } // namespace litesimd 106 | 107 | #endif // LITESIMD_HAS_SSE 108 | #endif // LITESIMD_ARCH_SSE_INTRAVECTOR_H 109 | -------------------------------------------------------------------------------- /include/litesimd/detail/arch/common/shuffle.h: -------------------------------------------------------------------------------- 1 | // MIT License 2 | // 3 | // Copyright (c) 2018 André Tupinambá 4 | // 5 | // Permission is hereby granted, free of charge, to any person obtaining a copy 6 | // of this software and associated documentation files (the "Software"), to deal 7 | // in the Software without restriction, including without limitation the rights 8 | // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | // copies of the Software, and to permit persons to whom the Software is 10 | // furnished to do so, subject to the following conditions: 11 | // 12 | // The above copyright notice and this permission notice shall be included in all 13 | // copies or substantial portions of the Software. 14 | // 15 | // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | // SOFTWARE. 22 | 23 | #ifndef LITESIMD_ARCH_COMMON_SHUFFLE_H 24 | #define LITESIMD_ARCH_COMMON_SHUFFLE_H 25 | 26 | #include 27 | 28 | namespace litesimd { 29 | 30 | // High/Low insert 31 | // --------------------------------------------------------------------------------------- 32 | template< typename ValueType_T, typename Tag_T = default_tag > 33 | inline simd_type< ValueType_T, Tag_T > 34 | high_insert( simd_type< ValueType_T, Tag_T >, ValueType_T = 0 ){} 35 | 36 | template< typename ValueType_T, typename Tag_T = default_tag > 37 | inline simd_type< ValueType_T, Tag_T > 38 | low_insert( simd_type< ValueType_T, Tag_T >, ValueType_T = 0 ){} 39 | 40 | /** 41 | * \ingroup shuffle 42 | * \brief Combine two SIMD registers using a mask to choose the values. 43 | * 44 | * The **blend** operation combines 2 SIMD registers using a mask, as a return of 45 | * compare functions, to select each value. 46 | * 47 | * Example of blend function on SSE and int32_t 48 | * 49 | * | Index | 3 | 2 | 1 | 0 | 50 | * | :--- | :--: | :--: | :--: | :--: | 51 | * | litesimd::t_int32_simd **mask** | 0x00000000 | 0xFFFFFFFF | 0xFFFFFFFF | 0x00000000 | 52 | * | litesimd::t_int32_simd X( 1 ); | 1 | 1 | 1 | 1 | 53 | * | litesimd::t_int32_simd Y( 2 ); | 2 | 2 | 2 | 2 | 54 | * | litesimd::blend( mask, X, Y ); | 2 | 1 | 1 | 2 | 55 | * 56 | * \param mask Mask to select the values 57 | * \param trueVal Values select when mask is true 58 | * \param falseVal Values select when mask is false 59 | * \tparam ValueType_T Base type of original SIMD register 60 | * \returns Combined SIMD register 61 | * 62 | * **Example** 63 | * ```{.cpp} 64 | * #include 65 | * #include 66 | * #include 67 | * #include 68 | * 69 | * int main() 70 | * { 71 | * namespace ls = litesimd; 72 | * 73 | * ls::t_int32_simd mask( 0x00000000, 0xffffffff, 0Xffffffff, 0x00000000 ); 74 | * ls::t_int32_simd val1( 1 ); 75 | * ls::t_int32_simd val2( 2 ); 76 | * std::cout << "blend( mask, val1, val2 ): " 77 | * << ls::blend( mask, val1, val2 ) << std::endl; 78 | * return 0; 79 | * } 80 | * ``` 81 | * Output on a SSE compilation 82 | * ``` 83 | * blend( mask, val1, val2 ): (2, 1, 1, 2) 84 | * ``` 85 | */ 86 | template< typename ValueType_T, typename Tag_T = default_tag > 87 | inline simd_type< ValueType_T, Tag_T > 88 | blend( simd_type< ValueType_T, Tag_T > mask, 89 | simd_type< ValueType_T, Tag_T > trueVal, 90 | simd_type< ValueType_T, Tag_T > falseVal ){} 91 | 92 | // Get Set 93 | // --------------------------------------------------------------------------------------- 94 | template< int index, typename ValueType_T, typename Tag_T = default_tag > 95 | struct get_functor 96 | { 97 | inline ValueType_T operator()( simd_type< ValueType_T, Tag_T > ){} 98 | }; 99 | 100 | template< int index, typename ValueType_T, typename Tag_T = default_tag > 101 | struct set_functor 102 | { 103 | inline simd_type< ValueType_T, Tag_T > operator()( simd_type< ValueType_T, Tag_T >, ValueType_T ){} 104 | }; 105 | 106 | } // namespace litesimd 107 | 108 | #endif // LITESIMD_ARCH_COMMON_SHUFFLE_H 109 | -------------------------------------------------------------------------------- /include/litesimd/detail/arch/common/algorithm.h: -------------------------------------------------------------------------------- 1 | // MIT License 2 | // 3 | // Copyright (c) 2018 André Tupinambá 4 | // 5 | // Permission is hereby granted, free of charge, to any person obtaining a copy 6 | // of this software and associated documentation files (the "Software"), to deal 7 | // in the Software without restriction, including without limitation the rights 8 | // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | // copies of the Software, and to permit persons to whom the Software is 10 | // furnished to do so, subject to the following conditions: 11 | // 12 | // The above copyright notice and this permission notice shall be included in all 13 | // copies or substantial portions of the Software. 14 | // 15 | // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | // SOFTWARE. 22 | 23 | #ifndef LITESIMD_ARCH_COMMON_ALGORITHM_H 24 | #define LITESIMD_ARCH_COMMON_ALGORITHM_H 25 | 26 | #include 27 | 28 | namespace litesimd { 29 | 30 | // Min max 31 | // --------------------------------------------------------------------------------------- 32 | /** 33 | * \ingroup algorithm 34 | * \brief Compares each value inside the SIMD register and returns the lesser of each pair. 35 | * 36 | * Each value inside the packed SIMD register is compared indenpendently and the result is 37 | * the lesser of the two values on the same index inside the packed SIMD register. 38 | * 39 | * | Index | 3 | 2 | 1 | 0 | 40 | * | :--- | :--: | :--: | :--: | :--: | 41 | * | Register X | a | b | c | d | 42 | * | Register Y | e | f | g | h | 43 | * | litesimd::min( X, Y ) | min( a, e ) | min( b, f ) | min( c, g ) | min( d, h ) | 44 | * 45 | * \param lhs, rhs SIMD registers to compare 46 | * \returns SIMD register with the lesser of each packed value 47 | * 48 | * **Example** 49 | * ```{.cpp} 50 | * #include 51 | * #include 52 | * #include 53 | * #include 54 | * 55 | * int main() 56 | * { 57 | * namespace ls = litesimd; 58 | * t_int32_simd a( 4, 1, 3, 2 ), b( 2, 2, 1, 4 ); 59 | * std::cout << "min( a, b ): " << ls::min( a, b ) << std::endl; 60 | * return 0; 61 | * } 62 | * ``` 63 | * Output on a SSE compilation 64 | * ``` 65 | * min( a, b ): ( 2, 1, 1, 2 ) 66 | * ``` 67 | * 68 | * \see max 69 | */ 70 | template< typename ValueType_T, typename Tag_T > inline simd_type< ValueType_T, Tag_T > 71 | min( simd_type< ValueType_T, Tag_T > lhs, simd_type< ValueType_T, Tag_T > rhs ){} 72 | 73 | /** 74 | * \ingroup algorithm 75 | * \brief Compares each value inside the SIMD register and returns the greater of each pair. 76 | * 77 | * Each value inside the packed SIMD register is compared indenpendently and the result is 78 | * the greater of the two values on the same index inside the packed SIMD register. 79 | * 80 | * | Index | 3 | 2 | 1 | 0 | 81 | * | :--- | :--: | :--: | :--: | :--: | 82 | * | Register X | a | b | c | d | 83 | * | Register Y | e | f | g | h | 84 | * | litesimd::max( X, Y ) | max( a, e ) | max( b, f ) | max( c, g ) | max( d, h ) | 85 | * 86 | * \param lhs, rhs SIMD registers to compare 87 | * \returns SIMD register with the greater of each packed value 88 | * 89 | * **Example** 90 | * ```{.cpp} 91 | * #include 92 | * #include 93 | * #include 94 | * #include 95 | * 96 | * int main() 97 | * { 98 | * namespace ls = litesimd; 99 | * t_int32_simd a( 4, 1, 3, 2 ), b( 2, 2, 1, 4 ); 100 | * std::cout << "max( a, b ): " << ls::max( a, b ) << std::endl; 101 | * return 0; 102 | * } 103 | * ``` 104 | * Output on a SSE compilation 105 | * ``` 106 | * max( a, b ): ( 4, 2, 3, 4 ) 107 | * ``` 108 | * 109 | * \see min 110 | */ 111 | template< typename ValueType_T, typename Tag_T > inline simd_type< ValueType_T, Tag_T > 112 | max( simd_type< ValueType_T, Tag_T > lhs, simd_type< ValueType_T, Tag_T > rhs ){} 113 | 114 | } // namespace litesimd 115 | 116 | #endif // LITESIMD_ARCH_COMMON_ALGORITHM_H 117 | -------------------------------------------------------------------------------- /include/litesimd/algorithm/minmax.h: -------------------------------------------------------------------------------- 1 | // MIT License 2 | // 3 | // Copyright (c) 2018 André Tupinambá 4 | // 5 | // Permission is hereby granted, free of charge, to any person obtaining a copy 6 | // of this software and associated documentation files (the "Software"), to deal 7 | // in the Software without restriction, including without limitation the rights 8 | // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | // copies of the Software, and to permit persons to whom the Software is 10 | // furnished to do so, subject to the following conditions: 11 | // 12 | // The above copyright notice and this permission notice shall be included in all 13 | // copies or substantial portions of the Software. 14 | // 15 | // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | // SOFTWARE. 22 | 23 | #ifndef LITESIMD_ALGORITHM_MINMAX_H 24 | #define LITESIMD_ALGORITHM_MINMAX_H 25 | 26 | #include 27 | #include 28 | #include 29 | 30 | namespace litesimd { 31 | 32 | /** 33 | * \ingroup algorithm 34 | * \brief Returns the smallest of each number in the SIMD register. 35 | * 36 | * \param vec SIMD register to compare 37 | * \return The lesser of the values in SIMD register 38 | * 39 | * **Example** 40 | * ```{.cpp} 41 | * #include 42 | * #include 43 | * #include 44 | * 45 | * int main() 46 | * { 47 | * namespace ls = litesimd; 48 | * std::cout << "min( zero ) == " << ls::min( ls::t_int32_simd::zero() ) << std::endl; 49 | * std::cout << "min( iota ) == " << ls::min( ls::iota< int32_t >( 5 ) ) << std::endl; 50 | * std::cout << "min( 4,3,2,1 ) == " << ls::min( ls::simd_type< int32_t, ls::sse_tag >( 4, 3, 2, 1 ) ) << std::endl; 51 | * return 0; 52 | * } 53 | * ``` 54 | * Output on a SSE compilation 55 | * ``` 56 | * min( zero ) == 0 57 | * min( iota ) == 5 58 | * min( 4,3,2,1 ) == 1 59 | * ``` 60 | * 61 | * \see max 62 | */ 63 | template< typename SimdType_T, typename SimdType_T::simd_value_type* = nullptr > 64 | inline typename SimdType_T::simd_value_type min( SimdType_T vec ) 65 | { 66 | using type = typename SimdType_T::simd_value_type; 67 | using tag = typename SimdType_T::simd_tag; 68 | struct op{ inline SimdType_T operator()( SimdType_T lhs, SimdType_T rhs ) 69 | { return min< type, tag >( lhs, rhs ); } }; 70 | return intravector_op< type, tag >()( vec, op() ); 71 | } 72 | 73 | /** 74 | * \ingroup algorithm 75 | * \brief Returns the largest the of each number in the SIMD register. 76 | * 77 | * \param vec SIMD register to compare 78 | * \return The greater of the values in SIMD register 79 | * 80 | * **Example** 81 | * ```{.cpp} 82 | * // max example 83 | * #include 84 | * #include 85 | * #include 86 | * 87 | * int main() 88 | * { 89 | * namespace ls = litesimd; 90 | * std::cout << "max( zero ) == " << ls::max( ls::t_int32_simd::zero() ) << std::endl; 91 | * std::cout << "max( iota ) == " << ls::max( ls::iota< int32_t, ls::sse_tag >( 5 ) ) << std::endl; 92 | * std::cout << "max( 4,3,2,1 ) == " << ls::max( ls::simd_type< int32_t, ls::sse_tag >( 4, 3, 2, 1 ) ) << std::endl; 93 | * return 0; 94 | * } 95 | * ``` 96 | * Output on a SSE compilation 97 | * ``` 98 | * max( zero ) == 0 99 | * max( iota ) == 8 100 | * max( 4,3,2,1 ) == 4 101 | * ``` 102 | * 103 | * \see min 104 | */ 105 | template< typename SimdType_T, typename SimdType_T::simd_value_type* = nullptr > 106 | inline typename SimdType_T::simd_value_type max( SimdType_T vec ) 107 | { 108 | using type = typename SimdType_T::simd_value_type; 109 | using tag = typename SimdType_T::simd_tag; 110 | struct op{ inline SimdType_T operator()( SimdType_T lhs, SimdType_T rhs ) 111 | { return max< type, tag >( lhs, rhs ); } }; 112 | return intravector_op< type, tag >()( vec, op() ); 113 | } 114 | 115 | } // namespace litesimd 116 | 117 | #endif // LITESIMD_ALGORITHM_MINMAX_H 118 | -------------------------------------------------------------------------------- /test/shuffle.cpp: -------------------------------------------------------------------------------- 1 | // MIT License 2 | // 3 | // Copyright (c) 2018 André Tupinambá 4 | // 5 | // Permission is hereby granted, free of charge, to any person obtaining a copy 6 | // of this software and associated documentation files (the "Software"), to deal 7 | // in the Software without restriction, including without limitation the rights 8 | // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | // copies of the Software, and to permit persons to whom the Software is 10 | // furnished to do so, subject to the following conditions: 11 | // 12 | // The above copyright notice and this permission notice shall be included in all 13 | // copies or substantial portions of the Software. 14 | // 15 | // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | // SOFTWARE. 22 | 23 | #include 24 | #include 25 | #include 26 | #include 27 | #include "gtest/gtest.h" 28 | 29 | namespace ls = litesimd; 30 | 31 | template class ShuffleTypedTest: public ::testing::Test {}; 32 | 33 | using TestTypes = ::testing::Types< 34 | #ifdef __SSE2__ 35 | std::pair, std::pair, 36 | std::pair, std::pair, 37 | std::pair, std::pair 38 | #ifdef __AVX2__ 39 | , std::pair, std::pair, 40 | std::pair, std::pair, 41 | std::pair, std::pair 42 | #endif //__AVX2__ 43 | #endif //__SSE2__ 44 | >; 45 | TYPED_TEST_CASE(ShuffleTypedTest, TestTypes); 46 | 47 | #ifdef __SSE2__ 48 | TEST(BaseTest, Set1FloatTest) 49 | { 50 | __m128 a = _mm_set_ps( 4.0f, 3.0f, 2.0f, 1.0f ); 51 | EXPECT_FLOAT_EQ( 1.0f, (ls::get<0, float, ls::sse_tag >( a )) ); 52 | 53 | __m128d c = _mm_set_pd( 2.0, 1.0 ); 54 | EXPECT_DOUBLE_EQ( 1.0, (ls::get<0, double, ls::sse_tag >( c )) ); 55 | 56 | #ifdef __AVX2__ 57 | __m256 b = _mm256_set_ps( 8.0f, 7.0f, 6.0f, 5.0f, 4.0f, 3.0f, 2.0f, 1.0f ); 58 | EXPECT_FLOAT_EQ( 1.0f, (ls::get<0, float, ls::avx_tag >( b )) ); 59 | 60 | __m256d d = _mm256_set_pd( 4.0, 3.0, 2.0, 1.0 ); 61 | EXPECT_DOUBLE_EQ( 1.0, (ls::get<0, double, ls::avx_tag >( d )) ); 62 | 63 | #endif //__AVX2__ 64 | } 65 | 66 | TYPED_TEST(ShuffleTypedTest, GetSetTest) 67 | { 68 | using type = typename TypeParam::first_type; 69 | using tag = typename TypeParam::second_type; 70 | using simd = ls::simd_type< type, tag >; 71 | 72 | simd a = simd::zero(); 73 | 74 | EXPECT_EQ( static_cast(0), (ls::get<0, type, tag >( a )) ); 75 | 76 | a = ls::set< 0, type, tag >( a, (type)1 ); 77 | 78 | EXPECT_EQ( static_cast(1), (ls::get<0, type, tag >( a )) ); 79 | } 80 | 81 | TYPED_TEST(ShuffleTypedTest, HighInsertTest) 82 | { 83 | using type = typename TypeParam::first_type; 84 | using tag = typename TypeParam::second_type; 85 | using simd = ls::simd_type< type, tag >; 86 | 87 | simd a = simd::zero(); 88 | a = ls::high_insert( a, 1 ); 89 | EXPECT_EQ( 1, (ls::get< simd::simd_size -1, type, tag> ( a )) ) << "Simd: " << a; 90 | 91 | a = ls::high_insert( ls::iota< type, tag >( 0 ), simd::simd_size ); 92 | 93 | // Weird bug on MacOSX clang 9 94 | // simd_type is a constexpr but somehow EXPECT_EQ macro creates an external symbol to it 95 | // on clang 9. This results in a linker error since there is no simd_size symbol defined. 96 | auto ssize = simd::simd_size; 97 | EXPECT_EQ( ssize, (ls::get< simd::simd_size -1, type, tag> ( a )) ) << "Simd: " << a; 98 | 99 | ls::for_each( a, [&a]( int index, type val ) 100 | { 101 | EXPECT_EQ( static_cast( index + 1 ), val ) << "Error on index " << index << ", Simd: " << a; 102 | return true; 103 | } ); 104 | } 105 | 106 | TYPED_TEST(ShuffleTypedTest, LowInsertTest) 107 | { 108 | using type = typename TypeParam::first_type; 109 | using tag = typename TypeParam::second_type; 110 | using simd = ls::simd_type< type, tag >; 111 | 112 | simd a = ls::iota< type, tag >( 1 ); 113 | a = ls::low_insert( a, 0 ); 114 | EXPECT_EQ( 0, (ls::get< 0, type, tag> ( a )) ); 115 | ls::for_each( a, []( int index, type val ) 116 | { 117 | EXPECT_EQ( static_cast( index ), val ) << "Error on index " << index; 118 | return true; 119 | } ); 120 | } 121 | #endif // __SSE2__ 122 | -------------------------------------------------------------------------------- /include/litesimd/detail/arch/avx/intravector.h: -------------------------------------------------------------------------------- 1 | // MIT License 2 | // 3 | // Copyright (c) 2018 André Tupinambá 4 | // 5 | // Permission is hereby granted, free of charge, to any person obtaining a copy 6 | // of this software and associated documentation files (the "Software"), to deal 7 | // in the Software without restriction, including without limitation the rights 8 | // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | // copies of the Software, and to permit persons to whom the Software is 10 | // furnished to do so, subject to the following conditions: 11 | // 12 | // The above copyright notice and this permission notice shall be included in all 13 | // copies or substantial portions of the Software. 14 | // 15 | // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | // SOFTWARE. 22 | 23 | #ifndef LITESIMD_ARCH_AVX_INTRAVECTOR_H 24 | #define LITESIMD_ARCH_AVX_INTRAVECTOR_H 25 | 26 | #ifdef LITESIMD_HAS_AVX 27 | 28 | #include 29 | #include 30 | 31 | namespace litesimd { 32 | 33 | template<> 34 | struct intravector_op< int8_t, avx_tag > 35 | { 36 | template< typename Function_T > 37 | int8_t inline operator()( simd_type< int8_t, avx_tag > vec, Function_T func ) 38 | { 39 | vec = func( vec, _mm256_shuffle_epi32( vec, _MM_SHUFFLE( 0, 0, 3, 2 ) ) ); 40 | vec = func( vec, _mm256_shuffle_epi32( vec, _MM_SHUFFLE( 0, 0, 0, 1 ) ) ); 41 | vec = func( vec, _mm256_permute4x64_epi64( vec, _MM_SHUFFLE( 0, 0, 0, 2 ) ) ); 42 | vec = func( vec, _mm256_shufflelo_epi16( vec, _MM_SHUFFLE( 0, 0, 0, 1 ) ) ); 43 | vec = func( vec, _mm256_srli_epi16( vec, 8 ) ); 44 | return (int8_t)_mm_cvtsi128_si32( _mm256_extracti128_si256( vec, 0 ) ); 45 | } 46 | }; 47 | 48 | template<> 49 | struct intravector_op< int16_t, avx_tag > 50 | { 51 | template< typename Function_T > 52 | int16_t inline operator()( simd_type< int16_t, avx_tag > vec, Function_T func ) 53 | { 54 | vec = func( vec, _mm256_shuffle_epi32( vec, _MM_SHUFFLE( 0, 0, 3, 2 ) ) ); 55 | vec = func( vec, _mm256_shuffle_epi32( vec, _MM_SHUFFLE( 0, 0, 0, 1 ) ) ); 56 | vec = func( vec, _mm256_permute4x64_epi64( vec, _MM_SHUFFLE( 0, 0, 0, 2 ) ) ); 57 | vec = func( vec, _mm256_shufflelo_epi16( vec, _MM_SHUFFLE( 0, 0, 0, 1 ) ) ); 58 | return (int16_t)_mm_cvtsi128_si32( _mm256_extracti128_si256( vec, 0 ) ); 59 | } 60 | }; 61 | 62 | template<> 63 | struct intravector_op< int32_t, avx_tag > 64 | { 65 | template< typename Function_T > 66 | int32_t inline operator()( simd_type< int32_t, avx_tag > vec, Function_T func ) 67 | { 68 | vec = func( vec, _mm256_shuffle_epi32( vec, _MM_SHUFFLE( 0, 0, 3, 2 ) ) ); 69 | vec = func( vec, _mm256_shuffle_epi32( vec, _MM_SHUFFLE( 0, 0, 0, 1 ) ) ); 70 | vec = func( vec, _mm256_permute4x64_epi64( vec, _MM_SHUFFLE( 0, 0, 0, 2 ) ) ); 71 | return _mm_cvtsi128_si32( _mm256_extracti128_si256( vec, 0 ) ); 72 | } 73 | }; 74 | 75 | template<> 76 | struct intravector_op< int64_t, avx_tag > 77 | { 78 | template< typename Function_T > 79 | int64_t inline operator()( simd_type< int64_t, avx_tag > vec, Function_T func ) 80 | { 81 | vec = func( vec, _mm256_permute4x64_epi64( vec, _MM_SHUFFLE( 0, 0, 3, 2 ) ) ); 82 | vec = func( vec, _mm256_permute4x64_epi64( vec, _MM_SHUFFLE( 0, 0, 0, 1 ) ) ); 83 | return _mm256_extract_epi64( vec, 0 ); 84 | } 85 | }; 86 | 87 | template<> 88 | struct intravector_op< float, avx_tag > 89 | { 90 | template< typename Function_T > 91 | float inline operator()( simd_type< float, avx_tag > vec, Function_T func ) 92 | { 93 | vec = func( vec, _mm256_permute2f128_ps( vec, vec, 1 ) ); 94 | vec = func( vec, _mm256_permute_ps( vec, _MM_SHUFFLE( 0, 0, 3, 2 ) ) ); 95 | vec = func( vec, _mm256_permute_ps( vec, _MM_SHUFFLE( 0, 0, 0, 1 ) ) ); 96 | return _mm256_cvtss_f32( vec ); 97 | } 98 | }; 99 | 100 | template<> 101 | struct intravector_op< double, avx_tag > 102 | { 103 | template< typename Function_T > 104 | double inline operator()( simd_type< double, avx_tag > vec, Function_T func ) 105 | { 106 | vec = func( vec, _mm256_permute4x64_pd( vec, _MM_SHUFFLE( 0, 0, 3, 2 ) ) ); 107 | vec = func( vec, _mm256_permute4x64_pd( vec, _MM_SHUFFLE( 0, 0, 0, 1 ) ) ); 108 | return _mm256_cvtsd_f64( vec ); 109 | } 110 | }; 111 | 112 | } // namespace litesimd 113 | 114 | #endif // LITESIMD_HAS_AVX 115 | #endif // LITESIMD_ARCH_AVX_INTRAVECTOR_H 116 | -------------------------------------------------------------------------------- /test/bitwise.cpp: -------------------------------------------------------------------------------- 1 | // MIT License 2 | // 3 | // Copyright (c) 2018 André Tupinambá 4 | // 5 | // Permission is hereby granted, free of charge, to any person obtaining a copy 6 | // of this software and associated documentation files (the "Software"), to deal 7 | // in the Software without restriction, including without limitation the rights 8 | // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | // copies of the Software, and to permit persons to whom the Software is 10 | // furnished to do so, subject to the following conditions: 11 | // 12 | // The above copyright notice and this permission notice shall be included in all 13 | // copies or substantial portions of the Software. 14 | // 15 | // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | // SOFTWARE. 22 | 23 | #include 24 | #include 25 | #include 26 | #include 27 | #include 28 | #include "gtest/gtest.h" 29 | 30 | namespace ls = litesimd; 31 | 32 | template class BitwiseTypedTest: public ::testing::Test {}; 33 | 34 | using TestTypes = ::testing::Types< 35 | #ifdef __SSE2__ 36 | std::pair, std::pair, 37 | std::pair, std::pair 38 | #ifdef __AVX2__ 39 | , std::pair, std::pair, 40 | std::pair, std::pair 41 | #endif //__AVX2__ 42 | #endif //__SSE2__ 43 | >; 44 | TYPED_TEST_CASE(BitwiseTypedTest, TestTypes); 45 | 46 | #ifdef __SSE2__ 47 | TYPED_TEST(BitwiseTypedTest, AndTypedTest) 48 | { 49 | using type = typename TypeParam::first_type; 50 | using tag = typename TypeParam::second_type; 51 | using simd = ls::simd_type< type, tag >; 52 | 53 | simd a = simd( 3 ); 54 | simd b = simd( 6 ); 55 | 56 | ls::for_each( ls::bit_and< type, tag >( a, b ), []( int index, type val ) 57 | { 58 | EXPECT_EQ( 2, val ) << "Error on index " << index; 59 | return true; 60 | } ); 61 | 62 | ls::for_each( a & b, []( int index, type val ) 63 | { 64 | EXPECT_EQ( 2, val ) << "Error on index " << index; 65 | return true; 66 | } ); 67 | 68 | a = ls::set<0, type, tag>( a, 6 ); 69 | auto ret = ls::bit_and(a); 70 | EXPECT_EQ( static_cast(6), (ls::get<0, type, tag>(a)) ) << "Simd: " << a; 71 | EXPECT_EQ( static_cast(2), ret ) << "ret, Simd: " << +ret << ", " << a; 72 | } 73 | 74 | TYPED_TEST(BitwiseTypedTest, OrTypedTest) 75 | { 76 | using type = typename TypeParam::first_type; 77 | using tag = typename TypeParam::second_type; 78 | using simd = ls::simd_type< type, tag >; 79 | 80 | simd a = simd( 3 ); 81 | simd b = simd( 6 ); 82 | 83 | ls::for_each( ls::bit_or< type, tag >( a, b ), []( int index, type val ) 84 | { 85 | EXPECT_EQ( 7, val ) << "Error on index " << index; 86 | return true; 87 | } ); 88 | 89 | ls::for_each( a | b, []( int index, type val ) 90 | { 91 | EXPECT_EQ( 7, val ) << "Error on index " << index; 92 | return true; 93 | } ); 94 | 95 | a = ls::set<0, type, tag>( a, 6 ); 96 | EXPECT_EQ( static_cast(7), ls::bit_or( a ) ); 97 | } 98 | 99 | TYPED_TEST(BitwiseTypedTest, XorTypedTest) 100 | { 101 | using type = typename TypeParam::first_type; 102 | using tag = typename TypeParam::second_type; 103 | using simd = ls::simd_type< type, tag >; 104 | 105 | simd a = simd( 3 ); 106 | simd b = simd( 6 ); 107 | 108 | ls::for_each( ls::bit_xor< type, tag >( a, b ), []( int index, type val ) 109 | { 110 | EXPECT_EQ( 5, val ) << "Error on index " << index; 111 | return true; 112 | } ); 113 | 114 | ls::for_each( a ^ b, []( int index, type val ) 115 | { 116 | EXPECT_EQ( 5, val ) << "Error on index " << index; 117 | return true; 118 | } ); 119 | 120 | a = ls::set<0, type, tag>( a, 6 ); 121 | EXPECT_EQ( static_cast(5), ls::bit_xor( a ) ); 122 | } 123 | 124 | TYPED_TEST(BitwiseTypedTest, NotTypedTest) 125 | { 126 | using type = typename TypeParam::first_type; 127 | using tag = typename TypeParam::second_type; 128 | using simd = ls::simd_type< type, tag >; 129 | 130 | simd a = simd( 3 ); 131 | 132 | ls::for_each( ls::bit_not< type, tag >( a ), []( int index, type val ) 133 | { 134 | EXPECT_EQ( static_cast( ~3 ), val ) << "Error on index " << index; 135 | return true; 136 | } ); 137 | 138 | ls::for_each( ~a, []( int index, type val ) 139 | { 140 | EXPECT_EQ( static_cast( ~3 ), val ) << "Error on index " << index; 141 | return true; 142 | } ); 143 | } 144 | 145 | #endif //__SSE2__ 146 | -------------------------------------------------------------------------------- /test/intravector.cpp: -------------------------------------------------------------------------------- 1 | // MIT License 2 | // 3 | // Copyright (c) 2018 André Tupinambá 4 | // 5 | // Permission is hereby granted, free of charge, to any person obtaining a copy 6 | // of this software and associated documentation files (the "Software"), to deal 7 | // in the Software without restriction, including without limitation the rights 8 | // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | // copies of the Software, and to permit persons to whom the Software is 10 | // furnished to do so, subject to the following conditions: 11 | // 12 | // The above copyright notice and this permission notice shall be included in all 13 | // copies or substantial portions of the Software. 14 | // 15 | // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | // SOFTWARE. 22 | 23 | #include 24 | #include 25 | #include 26 | #include 27 | #include "gtest/gtest.h" 28 | 29 | namespace ls = litesimd; 30 | 31 | template class IntravectorTypedTest: public ::testing::Test {}; 32 | 33 | using TestTypes = ::testing::Types< 34 | #ifdef LITESIMD_HAS_SSE 35 | std::pair, std::pair, 36 | std::pair, std::pair, 37 | std::pair, std::pair 38 | #ifdef LITESIMD_HAS_AVX 39 | , std::pair, std::pair, 40 | std::pair, std::pair, 41 | std::pair, std::pair 42 | #endif // LITESIMD_HAS_AVX 43 | #endif // LITESIMD_HAS_SSE 44 | >; 45 | TYPED_TEST_CASE(IntravectorTypedTest, TestTypes); 46 | 47 | #ifdef LITESIMD_HAS_SSE 48 | TYPED_TEST(IntravectorTypedTest, HorizontalAritmeticTest) 49 | { 50 | using type = typename TypeParam::first_type; 51 | using tag = typename TypeParam::second_type; 52 | using simd = ls::simd_type< type, tag >; 53 | using func_t = simd(*)(simd, simd); 54 | 55 | simd a = ls::iota< type, tag >( 1 ); 56 | type sum = 0; 57 | for( size_t i = 0; i < simd::simd_size; ++i ) 58 | { 59 | sum += static_cast( i + 1 ); 60 | } 61 | EXPECT_EQ( sum, ls::horizontal( a, static_cast< func_t >(ls::add< type, tag >) ) ); 62 | } 63 | 64 | TYPED_TEST(IntravectorTypedTest, HorizontalLambdaTest) 65 | { 66 | using type = typename TypeParam::first_type; 67 | using tag = typename TypeParam::second_type; 68 | using simd = ls::simd_type< type, tag >; 69 | 70 | simd a = ls::iota< type, tag >( 1 ); 71 | type sum = 0; 72 | for( size_t i = 0; i < simd::simd_size; ++i ) 73 | { 74 | sum += static_cast( i + 2 ); 75 | } 76 | --sum; 77 | auto func = []( simd lhs, simd rhs ) -> simd 78 | { 79 | return ls::add( static_cast< type >( 1 ), ls::add( lhs, rhs ) ); 80 | }; 81 | EXPECT_EQ( sum, ls::horizontal( a, func ) ); 82 | } 83 | 84 | #if (__GNUC__ >= 5) || defined(__clang__) 85 | // Not compatible with old GCCs 86 | TEST(BaseTest, HorizontalIntrincsTest) 87 | { 88 | // Sum ones, because iota will give us an overflow 89 | auto i8 = ls::simd_type< int8_t, ls::sse_tag >( 1 ); 90 | auto i16 = ls::iota< int16_t, ls::sse_tag >( 1 ); 91 | auto i32 = ls::iota< int32_t, ls::sse_tag >( 1 ); 92 | auto i64 = ls::iota< int64_t, ls::sse_tag >( 1 ); 93 | auto f32 = ls::iota< float, ls::sse_tag >( 1 ); 94 | auto f64 = ls::iota< double, ls::sse_tag >( 1 ); 95 | 96 | EXPECT_EQ( 16, ls::horizontal( i8, _mm_add_epi8 ) ); 97 | EXPECT_EQ( 36, ls::horizontal( i16, _mm_add_epi16 ) ); 98 | EXPECT_EQ( 10, ls::horizontal( i32, _mm_add_epi32 ) ); 99 | EXPECT_EQ( 3, ls::horizontal( i64, _mm_add_epi64 ) ); 100 | EXPECT_FLOAT_EQ( 10, ls::horizontal( f32, _mm_add_ps ) ); 101 | EXPECT_DOUBLE_EQ( 3, ls::horizontal( f64, _mm_add_pd ) ); 102 | 103 | #ifdef LITESIMD_HAS_AVX 104 | // Sum ones, because iota will give us an overflow 105 | auto i8a = ls::simd_type< int8_t, ls::avx_tag >( 1 ); 106 | auto i16a = ls::iota< int16_t, ls::avx_tag >( 1 ); 107 | auto i32a = ls::iota< int32_t, ls::avx_tag >( 1 ); 108 | auto i64a = ls::iota< int64_t, ls::avx_tag >( 1 ); 109 | auto f32a = ls::iota< float, ls::avx_tag >( 1 ); 110 | auto f64a = ls::iota< double, ls::avx_tag >( 1 ); 111 | 112 | EXPECT_EQ( 32, ls::horizontal( i8a, _mm256_add_epi8 ) ); 113 | EXPECT_EQ( 136, ls::horizontal( i16a, _mm256_add_epi16 ) ); 114 | EXPECT_EQ( 36, ls::horizontal( i32a, _mm256_add_epi32 ) ); 115 | EXPECT_EQ( 10, ls::horizontal( i64a, _mm256_add_epi64 ) ); 116 | EXPECT_FLOAT_EQ( 36, ls::horizontal( f32a, _mm256_add_ps ) ); 117 | EXPECT_DOUBLE_EQ( 10, ls::horizontal( f64a, _mm256_add_pd ) ); 118 | #endif // LITESIMD_HAS_AVX 119 | } 120 | #endif // __GNUC__ 121 | 122 | #endif // LITESIMD_HAS_SSE 123 | -------------------------------------------------------------------------------- /test/arithmetic.cpp: -------------------------------------------------------------------------------- 1 | // MIT License 2 | // 3 | // Copyright (c) 2018 André Tupinambá 4 | // 5 | // Permission is hereby granted, free of charge, to any person obtaining a copy 6 | // of this software and associated documentation files (the "Software"), to deal 7 | // in the Software without restriction, including without limitation the rights 8 | // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | // copies of the Software, and to permit persons to whom the Software is 10 | // furnished to do so, subject to the following conditions: 11 | // 12 | // The above copyright notice and this permission notice shall be included in all 13 | // copies or substantial portions of the Software. 14 | // 15 | // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | // SOFTWARE. 22 | 23 | #include 24 | #include 25 | #include 26 | #include "gtest/gtest.h" 27 | 28 | namespace ls = litesimd; 29 | 30 | template class ArithmeticTypedTest: public ::testing::Test {}; 31 | 32 | using TestTypes = ::testing::Types< 33 | #ifdef __SSE2__ 34 | std::pair, std::pair, 35 | std::pair, std::pair, 36 | std::pair, std::pair 37 | #ifdef __AVX2__ 38 | , std::pair, std::pair, 39 | std::pair, std::pair, 40 | std::pair, std::pair 41 | #endif //__AVX2__ 42 | #endif //__SSE2__ 43 | >; 44 | TYPED_TEST_CASE(ArithmeticTypedTest, TestTypes); 45 | 46 | template class ArithmeticTaggedTest: public ::testing::Test {}; 47 | 48 | using TagTypes = ::testing::Types< 49 | #ifdef __SSE2__ 50 | ls::sse_tag 51 | #ifdef __AVX2__ 52 | , ls::avx_tag 53 | #endif //__AVX2__ 54 | #endif //__SSE2__ 55 | >; 56 | 57 | TYPED_TEST_CASE(ArithmeticTaggedTest, TagTypes); 58 | 59 | #ifdef __SSE2__ 60 | TYPED_TEST(ArithmeticTypedTest, AddTypedTest) 61 | { 62 | using type = typename TypeParam::first_type; 63 | using tag = typename TypeParam::second_type; 64 | using simd = ls::simd_type< type, tag >; 65 | 66 | simd a = simd( static_cast(1) ); 67 | simd b = simd( static_cast(2) ); 68 | 69 | ls::for_each( ls::add< type, tag >( a, b ), []( int index, type val ) 70 | { 71 | EXPECT_EQ( static_cast(3), val ) << "Error on index " << index; 72 | return true; 73 | } ); 74 | 75 | ls::for_each( a + b, []( int index, type val ) 76 | { 77 | EXPECT_EQ( static_cast(3), val ) << "Error on index " << index; 78 | return true; 79 | } ); 80 | } 81 | 82 | TYPED_TEST(ArithmeticTypedTest, SubTypedTest) 83 | { 84 | using type = typename TypeParam::first_type; 85 | using tag = typename TypeParam::second_type; 86 | using simd = ls::simd_type< type, tag >; 87 | 88 | simd a = simd( 8 ); 89 | simd b = simd( 3 ); 90 | 91 | ls::for_each( ls::sub< type, tag >( a, b ), []( int index, type val ) 92 | { 93 | EXPECT_EQ( 5, val ) << "Error on index " << index; 94 | return true; 95 | } ); 96 | 97 | ls::for_each( a - b, []( int index, type val ) 98 | { 99 | EXPECT_EQ( 5, val ) << "Error on index " << index; 100 | return true; 101 | } ); 102 | } 103 | 104 | TYPED_TEST(ArithmeticTaggedTest, MulLoHiTest) 105 | { 106 | using tag = TypeParam; 107 | using simd16 = ls::simd_type< int16_t, tag >; 108 | using simd32 = ls::simd_type< int32_t, tag >; 109 | 110 | simd16 a = simd16( 0x2003 ); 111 | simd16 b = simd16( 0x30 ); 112 | 113 | ls::for_each( ls::mullo( a, b ), []( int index, int16_t val ) 114 | { 115 | EXPECT_EQ( 0x90, val ) << "Error on index " << index; 116 | return true; 117 | } ); 118 | 119 | ls::for_each( ls::mulhi( a, b ), []( int index, int16_t val ) 120 | { 121 | EXPECT_EQ( 6, val ) << "Error on index " << index; 122 | return true; 123 | } ); 124 | 125 | simd32 d = simd32( 0x20000003 ); 126 | simd32 e = simd32( 0x30 ); 127 | 128 | ls::for_each( ls::mullo( d, e ), []( int index, int32_t val ) 129 | { 130 | EXPECT_EQ( 0x90, val ) << "Error on index " << index; 131 | return true; 132 | } ); 133 | } 134 | 135 | TYPED_TEST(ArithmeticTaggedTest, DivTest) 136 | { 137 | using tag = TypeParam; 138 | using simdf = ls::simd_type< float, tag >; 139 | using simdd = ls::simd_type< double, tag >; 140 | 141 | simdf a = simdf( 20 ); 142 | simdf b = simdf( 2 ); 143 | 144 | ls::for_each( ls::div( a, b ), []( int index, float val ) 145 | { 146 | EXPECT_FLOAT_EQ( 10, val ) << "Error on index " << index; 147 | return true; 148 | } ); 149 | 150 | simdd d = simdd( 20 ); 151 | simdd e = simdd( 2 ); 152 | 153 | ls::for_each( ls::div( d, e ), []( int index, double val ) 154 | { 155 | EXPECT_DOUBLE_EQ( 10, val ) << "Error on index " << index; 156 | return true; 157 | } ); 158 | } 159 | #endif //__SSE2__ 160 | -------------------------------------------------------------------------------- /include/litesimd/detail/arch/common/bitwise.h: -------------------------------------------------------------------------------- 1 | // MIT License 2 | // 3 | // Copyright (c) 2018 André Tupinambá 4 | // 5 | // Permission is hereby granted, free of charge, to any person obtaining a copy 6 | // of this software and associated documentation files (the "Software"), to deal 7 | // in the Software without restriction, including without limitation the rights 8 | // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | // copies of the Software, and to permit persons to whom the Software is 10 | // furnished to do so, subject to the following conditions: 11 | // 12 | // The above copyright notice and this permission notice shall be included in all 13 | // copies or substantial portions of the Software. 14 | // 15 | // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | // SOFTWARE. 22 | 23 | #ifndef LITESIMD_ARCH_COMMON_BITWISE_H 24 | #define LITESIMD_ARCH_COMMON_BITWISE_H 25 | 26 | #include 27 | 28 | namespace litesimd { 29 | 30 | // Basic bitwise operations 31 | // --------------------------------------------------------------------------------------- 32 | 33 | /** 34 | * \ingroup bitwise 35 | * \brief Returns the bitwise AND operation between passed parameters. 36 | * 37 | * \param lhs, rhs SIMD registers to apply bitwise AND operation. 38 | * \returns SIMD register with the result of the operation. 39 | * 40 | * **Example** 41 | * ```{.cpp} 42 | * #include 43 | * #include 44 | * #include 45 | * #include 46 | * 47 | * int main() 48 | * { 49 | * namespace ls = litesimd; 50 | * 51 | * ls::t_int32_simd a( 3 ), b( 6 ); 52 | * std::cout << "bit_and( a, b ): " << ls::bit_and( a, b ) << std::endl; 53 | * return 0; 54 | * } 55 | * ``` 56 | * Output on a SSE compilation 57 | * ``` 58 | * bit_and( a, b ): (2, 2, 2, 2) 59 | * ``` 60 | */ 61 | template< typename ValueType_T, typename Tag_T = default_tag > 62 | inline simd_type< ValueType_T, Tag_T > 63 | bit_and( simd_type< ValueType_T, Tag_T > lhs, simd_type< ValueType_T, Tag_T > rhs ){} 64 | 65 | /** 66 | * \ingroup bitwise 67 | * \brief Returns the bitwise OR operation between passed parameters. 68 | * 69 | * \param lhs, rhs SIMD registers to apply bitwise OR operation. 70 | * \returns SIMD register with the result of the operation. 71 | * 72 | * **Example** 73 | * ```{.cpp} 74 | * #include 75 | * #include 76 | * #include 77 | * #include 78 | * 79 | * int main() 80 | * { 81 | * namespace ls = litesimd; 82 | * 83 | * ls::t_int32_simd a( 3 ), b( 6 ); 84 | * std::cout << "bit_or( a, b ): " << ls::bit_or( a, b ) << std::endl; 85 | * return 0; 86 | * } 87 | * ``` 88 | * Output on a SSE compilation 89 | * ``` 90 | * bit_or( a, b ): (7, 7, 7, 7) 91 | * ``` 92 | */ 93 | template< typename ValueType_T, typename Tag_T = default_tag > 94 | inline simd_type< ValueType_T, Tag_T > 95 | bit_or( simd_type< ValueType_T, Tag_T > lhs, simd_type< ValueType_T, Tag_T > rhs ){} 96 | 97 | /** 98 | * \ingroup bitwise 99 | * \brief Returns the bitwise XOR operation between passed parameters. 100 | * 101 | * \param lhs, rhs SIMD registers to apply bitwise XOR operation. 102 | * \returns SIMD register with the result of the operation. 103 | * 104 | * **Example** 105 | * ```{.cpp} 106 | * #include 107 | * #include 108 | * #include 109 | * #include 110 | * 111 | * int main() 112 | * { 113 | * namespace ls = litesimd; 114 | * 115 | * ls::t_int32_simd a( 3 ), b( 6 ); 116 | * std::cout << "bit_xor( a, b ): " << ls::bit_xor( a, b ) << std::endl; 117 | * return 0; 118 | * } 119 | * ``` 120 | * Output on a SSE compilation 121 | * ``` 122 | * bit_xor( a, b ): (5, 5, 5, 5) 123 | * ``` 124 | */ 125 | template< typename ValueType_T, typename Tag_T = default_tag > 126 | inline simd_type< ValueType_T, Tag_T > 127 | bit_xor( simd_type< ValueType_T, Tag_T > lhs, simd_type< ValueType_T, Tag_T > rhs ){} 128 | 129 | /** 130 | * \ingroup bitwise 131 | * \brief Inverts all bits on a SIMD register. 132 | * 133 | * \param vec SIMD register to negate. 134 | * \returns SIMD register with all bit inverted. 135 | * 136 | * **Example** 137 | * ```{.cpp} 138 | * #include 139 | * #include 140 | * #include 141 | * #include 142 | * 143 | * int main() 144 | * { 145 | * namespace ls = litesimd; 146 | * 147 | * ls::t_int32_simd a( 0xfffffff0 ); 148 | * std::cout << "bit_not( a ): " << ls::bit_not( a ) << std::endl; 149 | * return 0; 150 | * } 151 | * ``` 152 | * Output on a SSE compilation 153 | * ``` 154 | * bit_not( a ): (15, 15, 15, 15) 155 | * ``` 156 | */ 157 | template< typename ValueType_T, typename Tag_T = default_tag > 158 | inline simd_type< ValueType_T, Tag_T > 159 | bit_not( simd_type< ValueType_T, Tag_T > vec ) 160 | { 161 | return bit_xor( vec, simd_type< ValueType_T, Tag_T >::ones() ); 162 | } 163 | 164 | } // namespace litesimd 165 | 166 | #endif // LITESIMD_ARCH_COMMON_BITWISE_H 167 | -------------------------------------------------------------------------------- /include/litesimd/detail/arch/common/compare.h: -------------------------------------------------------------------------------- 1 | // MIT License 2 | // 3 | // Copyright (c) 2018 André Tupinambá 4 | // 5 | // Permission is hereby granted, free of charge, to any person obtaining a copy 6 | // of this software and associated documentation files (the "Software"), to deal 7 | // in the Software without restriction, including without limitation the rights 8 | // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | // copies of the Software, and to permit persons to whom the Software is 10 | // furnished to do so, subject to the following conditions: 11 | // 12 | // The above copyright notice and this permission notice shall be included in all 13 | // copies or substantial portions of the Software. 14 | // 15 | // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | // SOFTWARE. 22 | 23 | #ifndef LITESIMD_ARCH_COMMON_COMPARE_H 24 | #define LITESIMD_ARCH_COMMON_COMPARE_H 25 | 26 | #include 27 | #include 28 | 29 | namespace litesimd { 30 | 31 | // Bit scan 32 | // --------------------------------------------------------------------------------------- 33 | template< typename Tag_T = default_tag > 34 | std::pair bit_scan_forward( uint32_t bitmask ){ return std::make_pair( -1, false ); } 35 | 36 | template< typename Tag_T = default_tag > 37 | std::pair bit_scan_reverse( uint32_t bitmask ){ return std::make_pair( -1, false ); } 38 | 39 | /** 40 | * \ingroup compare 41 | * \brief Converts a SIMD mask to a bitmask 42 | * 43 | * \param mask SIMD mask to be converted 44 | * \tparam ValueType_T Base type of original SIMD register 45 | * \returns Bitmask equivalent 46 | * 47 | * **Example** 48 | * ```{.cpp} 49 | * #include 50 | * #include 51 | * #include 52 | * 53 | * int main() 54 | * { 55 | * namespace ls = litesimd; 56 | * 57 | * ls::t_int32_simd x( 9, 8, 7, 6 ); 58 | * ls::t_int32_simd y( 9, 8, 5, 6 ); 59 | * auto mask = ls::equal_to( x, y ); // (0xFFFFFFFF, 0xFFFFFFFF, 0x00000000, 0xFFFFFFFF) 60 | * std::cout << "mask_to_bitmask< int32_t >( mask ): " << std::hex 61 | * << ls::mask_to_bitmask< int32_t >( mask ) << std::endl; 62 | * return 0; 63 | * } 64 | * ``` 65 | * Output on a SSE compilation 66 | * ``` 67 | * mask_to_bitmask< int32_t >( mask ): ff0f 68 | * ``` 69 | */ 70 | template< typename ValueType_T, typename Tag_T = default_tag > 71 | inline typename simd_type< ValueType_T, Tag_T >::bitmask_type 72 | mask_to_bitmask( simd_type< ValueType_T, Tag_T > mask ) 73 | { 74 | return 0; 75 | } 76 | 77 | /** 78 | * \ingroup compare 79 | * \brief Compares two SIMD registers and returns a mask representing the values of the first parameter is greater than the second parameter 80 | * 81 | * \param lhs, rhs Values to be compared 82 | * \tparam ValueType_T Base type of SIMD register 83 | * \returns Mask representing the values of the lhs parameter which are greater than the rhs parameter 84 | * 85 | * **Example** 86 | * ```{.cpp} 87 | * #include 88 | * #include 89 | * #include 90 | * 91 | * int main() 92 | * { 93 | * namespace ls = litesimd; 94 | * 95 | * ls::t_int32_simd x( 9, 8, 7, 6 ); 96 | * ls::t_int32_simd y( 9, 8, 5, 6 ); 97 | * std::cout << "greater( x, y ): " << std::hex << ls::greater( x, y ) << std::endl; 98 | * return 0; 99 | * } 100 | * ``` 101 | * Output on a SSE compilation 102 | * ``` 103 | * greater( x, y ): (0, 0, ffffffff, 0) 104 | * ``` 105 | */ 106 | template< typename ValueType_T, typename Tag_T = default_tag > 107 | inline simd_type< ValueType_T, Tag_T > 108 | greater( simd_type< ValueType_T, Tag_T > lhs, simd_type< ValueType_T, Tag_T > rhs ){} 109 | 110 | /** 111 | * \ingroup compare 112 | * \brief Compares two SIMD registers and returns a mask of equal values 113 | * 114 | * \param lhs, rhs Values to be compared 115 | * \tparam ValueType_T Base type of SIMD register 116 | * \returns Mask representing the values of the lhs parameter which are equal to the rhs parameter 117 | * 118 | * **Example** 119 | * ```{.cpp} 120 | * #include 121 | * #include 122 | * #include 123 | * 124 | * int main() 125 | * { 126 | * namespace ls = litesimd; 127 | * 128 | * ls::t_int32_simd x( 9, 8, 7, 6 ); 129 | * ls::t_int32_simd y( 9, 8, 5, 6 ); 130 | * std::cout << "equal_to( x, y ): " << std::hex << ls::equal_to( x, y ) << std::endl; 131 | * return 0; 132 | * } 133 | * ``` 134 | * Output on a SSE compilation 135 | * ``` 136 | * equal_to( x, y ): (ffffffff, ffffffff, 0, ffffffff) 137 | * ``` 138 | */ 139 | template< typename ValueType_T, typename Tag_T = default_tag > 140 | inline simd_type< ValueType_T, Tag_T > 141 | equal_to( simd_type< ValueType_T, Tag_T > lhs, simd_type< ValueType_T, Tag_T > rhs ){} 142 | 143 | template< typename ValueType_T, typename Tag_T = default_tag > 144 | struct all_of_op{}; 145 | 146 | template< typename ValueType_T, typename Tag_T = default_tag > 147 | struct all_of_bitmask_op{}; 148 | 149 | template< typename ValueType_T, typename Tag_T = default_tag > 150 | struct none_of_op{}; 151 | 152 | } // namespace litesimd 153 | 154 | #endif // LITESIMD_ARCH_COMMON_COMPARE_H 155 | -------------------------------------------------------------------------------- /include/litesimd/detail/arch/sse/traits.h: -------------------------------------------------------------------------------- 1 | // MIT License 2 | // 3 | // Copyright (c) 2018 André Tupinambá 4 | // 5 | // Permission is hereby granted, free of charge, to any person obtaining a copy 6 | // of this software and associated documentation files (the "Software"), to deal 7 | // in the Software without restriction, including without limitation the rights 8 | // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | // copies of the Software, and to permit persons to whom the Software is 10 | // furnished to do so, subject to the following conditions: 11 | // 12 | // The above copyright notice and this permission notice shall be included in all 13 | // copies or substantial portions of the Software. 14 | // 15 | // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | // SOFTWARE. 22 | 23 | #ifndef LITESIMD_ARCH_SSE_TRAITS_H 24 | #define LITESIMD_ARCH_SSE_TRAITS_H 25 | 26 | #ifdef LITESIMD_HAS_SSE 27 | 28 | #include 29 | #include 30 | 31 | namespace litesimd { 32 | 33 | template<> struct traits< int8_t, sse_tag >{ 34 | using simd_type = __m128i; 35 | using bitmask_type = uint32_t; 36 | static inline simd_type zero() { return _mm_setzero_si128(); } 37 | static inline simd_type ones() { return _mm_cmpeq_epi8( zero(), zero() ); } 38 | static inline simd_type from_value( int8_t v ) { return _mm_set1_epi8( v ); } 39 | static inline simd_type from_values( int8_t v15, int8_t v14, int8_t v13, int8_t v12, 40 | int8_t v11, int8_t v10, int8_t v9, int8_t v8, 41 | int8_t v7, int8_t v6, int8_t v5, int8_t v4, 42 | int8_t v3, int8_t v2, int8_t v1, int8_t v0 ) 43 | { 44 | return _mm_set_epi8( v15, v14, v13, v12, v11, v10, v9, v8, 45 | v7, v6, v5, v4, v3, v2, v1, v0 ); 46 | } 47 | static inline simd_type iota() { return from_values( 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0 ); } 48 | }; 49 | 50 | template<> struct traits< int16_t, sse_tag >{ 51 | using simd_type = __m128i; 52 | using bitmask_type = uint32_t; 53 | static inline simd_type zero() { return _mm_setzero_si128(); } 54 | static inline simd_type ones() { return _mm_cmpeq_epi8( zero(), zero() ); } 55 | static inline simd_type from_value( int16_t v ) { return _mm_set1_epi16( v ); } 56 | static inline simd_type from_values( int16_t v7, int16_t v6, int16_t v5, int16_t v4, 57 | int16_t v3, int16_t v2, int16_t v1, int16_t v0 ) 58 | { 59 | return _mm_set_epi16( v7, v6, v5, v4, v3, v2, v1, v0 ); 60 | } 61 | static inline simd_type iota() { return from_values( 7, 6, 5, 4, 3, 2, 1, 0 ); } 62 | }; 63 | 64 | template<> struct traits< int32_t, sse_tag >{ 65 | using simd_type = __m128i; 66 | using bitmask_type = uint32_t; 67 | static inline simd_type zero() { return _mm_setzero_si128(); } 68 | static inline simd_type ones() { return _mm_cmpeq_epi8( zero(), zero() ); } 69 | static inline simd_type from_value( int32_t v ) { return _mm_set1_epi32( v ); } 70 | static inline simd_type from_values( int32_t v3, int32_t v2, int32_t v1, int32_t v0 ) 71 | { 72 | return _mm_set_epi32( v3, v2, v1, v0 ); 73 | } 74 | static inline simd_type iota() { return from_values( 3, 2, 1, 0 ); } 75 | }; 76 | 77 | template<> struct traits< int64_t, sse_tag >{ 78 | using simd_type = __m128i; 79 | using bitmask_type = uint32_t; 80 | static inline simd_type zero() { return _mm_setzero_si128(); } 81 | static inline simd_type ones() { return _mm_cmpeq_epi8( zero(), zero() ); } 82 | static inline simd_type from_value( int64_t v ) { return _mm_set1_epi64x( v ); } 83 | static inline simd_type from_values( int64_t v1, int64_t v0 ) 84 | { 85 | return _mm_set_epi64x( v1, v0 ); 86 | } 87 | static inline simd_type iota() { return from_values( 1, 0 ); } 88 | }; 89 | 90 | template<> struct traits< float, sse_tag >{ 91 | using simd_type = __m128; 92 | using bitmask_type = uint32_t; 93 | static inline simd_type zero() { return _mm_setzero_ps(); } 94 | static inline simd_type ones() { return _mm_cmpeq_ps( zero(), zero() ); } 95 | static inline simd_type from_value( float v ) { return _mm_set1_ps( v ); } 96 | static inline simd_type from_values( float v3, float v2, float v1, float v0 ) 97 | { 98 | return _mm_set_ps( v3, v2, v1, v0 ); 99 | } 100 | static inline simd_type iota() { return from_values( 3, 2, 1, 0 ); } 101 | }; 102 | 103 | template<> struct traits< double, sse_tag >{ 104 | using simd_type = __m128d; 105 | using bitmask_type = uint32_t; 106 | static inline simd_type zero() { return _mm_setzero_pd(); } 107 | static inline simd_type ones() { return _mm_cmpeq_pd( zero(), zero() ); } 108 | static inline simd_type from_value( double v ) { return _mm_set1_pd( v ); } 109 | static inline simd_type from_values( double v1, double v0 ) 110 | { 111 | return _mm_set_pd( v1, v0 ); 112 | } 113 | static inline simd_type iota() { return from_values( 1, 0 ); } 114 | }; 115 | 116 | } // namespace litesimd 117 | 118 | #endif // LITESIMD_HAS_SSE 119 | #endif // LITESIMD_ARCH_SSE_TRAITS_H 120 | -------------------------------------------------------------------------------- /include/litesimd/types.h: -------------------------------------------------------------------------------- 1 | // MIT License 2 | // 3 | // Copyright (c) 2018 André Tupinambá 4 | // 5 | // Permission is hereby granted, free of charge, to any person obtaining a copy 6 | // of this software and associated documentation files (the "Software"), to deal 7 | // in the Software without restriction, including without limitation the rights 8 | // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | // copies of the Software, and to permit persons to whom the Software is 10 | // furnished to do so, subject to the following conditions: 11 | // 12 | // The above copyright notice and this permission notice shall be included in all 13 | // copies or substantial portions of the Software. 14 | // 15 | // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | // SOFTWARE. 22 | 23 | #ifndef LITESIMD_TYPES_H 24 | #define LITESIMD_TYPES_H 25 | 26 | #include 27 | #include 28 | 29 | namespace litesimd { 30 | 31 | /** 32 | * \defgroup type SIMD type 33 | * 34 | * This defines the basic litesimd type. 35 | * 36 | * All this functions are accessable at `` 37 | */ 38 | 39 | /** 40 | * \ingroup type 41 | * \class simd_type 42 | * \brief Basic litesimd type 43 | * 44 | * The simd_type is transparently interoperable with SIMD intrincs to 45 | * allow the developer to perform more complex operations than originally 46 | * anticipated by the library. 47 | * 48 | * \tparam ValueType_T Base type of original SIMD register 49 | */ 50 | template< typename ValueType_T, typename Tag_T = default_tag > 51 | class simd_type 52 | { 53 | using simd_traits = traits< ValueType_T, Tag_T >; 54 | 55 | public: 56 | /// Base type of original SIMD register 57 | using simd_value_type = ValueType_T; 58 | /// SIMD instruction TAG selector 59 | using simd_tag = Tag_T; 60 | 61 | /// Type of this class 62 | using type = simd_type< simd_value_type, simd_tag >; 63 | /// Intrincs original type 64 | using inner_type = typename simd_traits::simd_type; 65 | /// Type of bitmasks 66 | using bitmask_type = typename simd_traits::bitmask_type; 67 | /// Type of index 68 | using index_type = int; 69 | /// How many values fit on SIMD register 70 | constexpr static size_t simd_size = sizeof(inner_type) / sizeof(simd_value_type); 71 | 72 | simd_type(){} 73 | 74 | /// Implicit contructor from intrincs SIMD type 75 | simd_type( inner_type v ) : v_(v) {} 76 | 77 | /// Implicit cast to intrincs SIMD type 78 | operator inner_type() const { return v_; } 79 | 80 | /** 81 | * \brief Construct a simd_type with all values equal 82 | * 83 | * \param v Value to be set 84 | * 85 | * **Example** 86 | * ```{.cpp} 87 | * #include 88 | * #include 89 | * #include 90 | * 91 | * int main() 92 | * { 93 | * namespace ls = litesimd; 94 | * 95 | * ls::t_int32_simd x( 3 ); 96 | * std::cout << "x: " << x << std::endl; 97 | * return 0; 98 | * } 99 | * ``` 100 | * Output on a SSE compilation 101 | * ``` 102 | * x: (3, 3, 3, 3) 103 | * ``` 104 | */ 105 | explicit simd_type( simd_value_type v ) : v_( simd_traits::from_value( v ) ) {} 106 | 107 | /** 108 | * \brief Construct a simd_type setting all values 109 | * 110 | * \param i, j, v Values to be set 111 | * 112 | * This contructor have different parameter count to match with how 113 | * many values fits on simd_type. 114 | * 115 | * **Example** 116 | * ```{.cpp} 117 | * #include 118 | * #include 119 | * #include 120 | * 121 | * int main() 122 | * { 123 | * namespace ls = litesimd; 124 | * 125 | * ls::t_int32_simd x( 3, 2, 1, 0 ); 126 | * std::cout << "x: " << x << std::endl; 127 | * return 0; 128 | * } 129 | * ``` 130 | * Output on a SSE compilation 131 | * ``` 132 | * x: (3, 2, 1, 0) 133 | * ``` 134 | */ 135 | template< typename... Value_T > 136 | simd_type( simd_value_type i, simd_value_type j, Value_T... v ) : 137 | v_( simd_traits::from_values( i, j, v... ) ) {} 138 | 139 | /// Returns a simd_type with all values zero 140 | static inline simd_type zero() { return simd_type( simd_traits::zero() ); } 141 | 142 | /// Returns a simd_type with all bits 1 143 | static inline simd_type ones() { return simd_type( simd_traits::ones() ); } 144 | 145 | /// Returns a simd_type with increasing values from 0 to simd_size -1, eg. (3, 2, 1, 0). 146 | static inline simd_type iota() { return simd_type( simd_traits::iota() ); } 147 | 148 | private: 149 | inner_type v_; 150 | }; 151 | 152 | /// Shortcut for int8_t simd_type on default instruction set 153 | using t_int8_simd = simd_type< int8_t, default_tag >; 154 | /// Shortcut for int16_t simd_type on default instruction set 155 | using t_int16_simd = simd_type< int16_t, default_tag >; 156 | /// Shortcut for int32_t simd_type on default instruction set 157 | using t_int32_simd = simd_type< int32_t, default_tag >; 158 | /// Shortcut for int64_t simd_type on default instruction set 159 | using t_int64_simd = simd_type< int64_t, default_tag >; 160 | 161 | /// Shortcut for float simd_type on default instruction set 162 | using t_float_simd = simd_type< float, default_tag >; 163 | /// Shortcut for double simd_type on default instruction set 164 | using t_double_simd = simd_type< double, default_tag >; 165 | 166 | } // namespace litesimd 167 | 168 | #endif // LITESIMD_TYPES_H 169 | -------------------------------------------------------------------------------- /include/litesimd/detail/arch/avx/traits.h: -------------------------------------------------------------------------------- 1 | // MIT License 2 | // 3 | // Copyright (c) 2018 André Tupinambá 4 | // 5 | // Permission is hereby granted, free of charge, to any person obtaining a copy 6 | // of this software and associated documentation files (the "Software"), to deal 7 | // in the Software without restriction, including without limitation the rights 8 | // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | // copies of the Software, and to permit persons to whom the Software is 10 | // furnished to do so, subject to the following conditions: 11 | // 12 | // The above copyright notice and this permission notice shall be included in all 13 | // copies or substantial portions of the Software. 14 | // 15 | // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | // SOFTWARE. 22 | 23 | #ifndef LITESIMD_ARCH_AVX_TRAITS_H 24 | #define LITESIMD_ARCH_AVX_TRAITS_H 25 | 26 | #ifdef LITESIMD_HAS_AVX 27 | 28 | #include 29 | #include 30 | 31 | namespace litesimd { 32 | 33 | template<> struct traits< int8_t, avx_tag >{ 34 | using simd_type = __m256i; 35 | using bitmask_type = uint32_t; 36 | static inline simd_type zero() { return _mm256_setzero_si256(); } 37 | static inline simd_type ones() { return _mm256_cmpeq_epi8( zero(), zero() ); } 38 | static inline simd_type from_value( int8_t v ) { return _mm256_set1_epi8( v ); } 39 | static inline simd_type from_values( int8_t v31, int8_t v30, int8_t v29, int8_t v28, 40 | int8_t v27, int8_t v26, int8_t v25, int8_t v24, 41 | int8_t v23, int8_t v22, int8_t v21, int8_t v20, 42 | int8_t v19, int8_t v18, int8_t v17, int8_t v16, 43 | int8_t v15, int8_t v14, int8_t v13, int8_t v12, 44 | int8_t v11, int8_t v10, int8_t v9, int8_t v8, 45 | int8_t v7, int8_t v6, int8_t v5, int8_t v4, 46 | int8_t v3, int8_t v2, int8_t v1, int8_t v0 ) 47 | { 48 | return _mm256_set_epi8( v31, v30, v29, v28, v27, v26, v25, v24, 49 | v23, v22, v21, v20, v19, v18, v17, v16, 50 | v15, v14, v13, v12, v11, v10, v9, v8, 51 | v7, v6, v5, v4, v3, v2, v1, v0 ); 52 | } 53 | static inline simd_type iota() { return from_values( 31, 30, 29, 28, 27, 26, 25, 24, 23, 22, 21, 20, 19, 18, 17, 16, 54 | 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0 ); } 55 | }; 56 | 57 | template<> struct traits< int16_t, avx_tag >{ 58 | using simd_type = __m256i; 59 | using bitmask_type = uint32_t; 60 | static inline simd_type zero() { return _mm256_setzero_si256(); } 61 | static inline simd_type ones() { return _mm256_cmpeq_epi8( zero(), zero() ); } 62 | static inline simd_type from_value( int16_t v ) { return _mm256_set1_epi16( v ); } 63 | static inline simd_type from_values( int16_t v15, int16_t v14, int16_t v13, int16_t v12, 64 | int16_t v11, int16_t v10, int16_t v9, int16_t v8, 65 | int16_t v7, int16_t v6, int16_t v5, int16_t v4, 66 | int16_t v3, int16_t v2, int16_t v1, int16_t v0 ) 67 | { 68 | return _mm256_set_epi16( v15, v14, v13, v12, v11, v10, v9, v8, 69 | v7, v6, v5, v4, v3, v2, v1, v0 ); 70 | } 71 | static inline simd_type iota() { return from_values( 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0 ); } 72 | }; 73 | 74 | template<> struct traits< int32_t, avx_tag >{ 75 | using simd_type = __m256i; 76 | using bitmask_type = uint32_t; 77 | static inline simd_type zero() { return _mm256_setzero_si256(); } 78 | static inline simd_type ones() { return _mm256_cmpeq_epi8( zero(), zero() ); } 79 | static inline simd_type from_value( int32_t v ) { return _mm256_set1_epi32( v ); } 80 | static inline simd_type from_values( int32_t v7, int32_t v6, int32_t v5, int32_t v4, 81 | int32_t v3, int32_t v2, int32_t v1, int32_t v0 ) 82 | { 83 | return _mm256_set_epi32( v7, v6, v5, v4, v3, v2, v1, v0 ); 84 | } 85 | static inline simd_type iota() { return from_values( 7, 6, 5, 4, 3, 2, 1, 0 ); } 86 | }; 87 | 88 | template<> struct traits< int64_t, avx_tag >{ 89 | using simd_type = __m256i; 90 | using bitmask_type = uint32_t; 91 | static inline simd_type zero() { return _mm256_setzero_si256(); } 92 | static inline simd_type ones() { return _mm256_cmpeq_epi8( zero(), zero() ); } 93 | static inline simd_type from_value( int64_t v ) { return _mm256_set1_epi64x( v ); } 94 | static inline simd_type from_values( int64_t v3, int64_t v2, int64_t v1, int64_t v0 ) 95 | { 96 | return _mm256_set_epi64x( v3, v2, v1, v0 ); 97 | } 98 | static inline simd_type iota() { return from_values( 3, 2, 1, 0 ); } 99 | }; 100 | 101 | template<> struct traits< float, avx_tag >{ 102 | using simd_type = __m256; 103 | using bitmask_type = uint32_t; 104 | static inline simd_type zero() { return _mm256_setzero_ps(); } 105 | static inline simd_type ones() { return _mm256_cmp_ps( zero(), zero(), _CMP_GT_OQ ); } 106 | static inline simd_type from_value( float v ) { return _mm256_set1_ps( v ); } 107 | static inline simd_type from_values( float v7, float v6, float v5, float v4, 108 | float v3, float v2, float v1, float v0 ) 109 | { 110 | return _mm256_set_ps( v7, v6, v5, v4, v3, v2, v1, v0 ); 111 | } 112 | static inline simd_type iota() { return from_values( 7, 6, 5, 4, 3, 2, 1, 0 ); } 113 | }; 114 | 115 | template<> struct traits< double, avx_tag >{ 116 | using simd_type = __m256d; 117 | using bitmask_type = uint32_t; 118 | static inline simd_type zero() { return _mm256_setzero_pd(); } 119 | static inline simd_type ones() { return _mm256_cmp_pd( zero(), zero(), _CMP_GT_OQ ); } 120 | static inline simd_type from_value( double v ) { return _mm256_set1_pd( v ); } 121 | static inline simd_type from_values( double v3, double v2, double v1, double v0 ) 122 | { 123 | return _mm256_set_pd( v3, v2, v1, v0 ); 124 | } 125 | static inline simd_type iota() { return from_values( 3, 2, 1, 0 ); } 126 | }; 127 | 128 | } // namespace litesimd 129 | 130 | #endif // LITESIMD_HAS_AVX 131 | #endif // LITESIMD_ARCH_AVX_TRAITS_H 132 | -------------------------------------------------------------------------------- /include/litesimd/detail/arch/sse/compare.h: -------------------------------------------------------------------------------- 1 | // MIT License 2 | // 3 | // Copyright (c) 2018 André Tupinambá 4 | // 5 | // Permission is hereby granted, free of charge, to any person obtaining a copy 6 | // of this software and associated documentation files (the "Software"), to deal 7 | // in the Software without restriction, including without limitation the rights 8 | // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | // copies of the Software, and to permit persons to whom the Software is 10 | // furnished to do so, subject to the following conditions: 11 | // 12 | // The above copyright notice and this permission notice shall be included in all 13 | // copies or substantial portions of the Software. 14 | // 15 | // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | // SOFTWARE. 22 | 23 | #ifndef LITESIMD_ARCH_SSE_COMPARE_H 24 | #define LITESIMD_ARCH_SSE_COMPARE_H 25 | 26 | #ifdef LITESIMD_HAS_SSE 27 | 28 | #include 29 | #include 30 | #include 31 | 32 | namespace litesimd { 33 | 34 | 35 | // Bit scan 36 | // --------------------------------------------------------------------------------------- 37 | template<> inline std::pair 38 | bit_scan_forward< sse_tag >( uint32_t bitmask ) 39 | { 40 | #ifdef _WIN32 41 | unsigned long index; 42 | return (0 == _BitScanForward( &index, bitmask )) 43 | ? std::make_pair( -1, false ) 44 | : std::make_pair( index, true ); 45 | #else 46 | return (bitmask == 0) 47 | ? std::make_pair( -1, false ) 48 | : std::make_pair( _bit_scan_forward( bitmask ), true ); 49 | #endif 50 | } 51 | 52 | template<> inline std::pair 53 | bit_scan_reverse< sse_tag >( uint32_t bitmask ) 54 | { 55 | #ifdef _WIN32 56 | unsigned long index; 57 | return (0 == _BitScanReverse( &index, bitmask )) 58 | ? std::make_pair( -1, false ) 59 | : std::make_pair( index, true ); 60 | #else 61 | return (bitmask == 0) 62 | ? std::make_pair( -1, false ) 63 | : std::make_pair( _bit_scan_reverse( bitmask ), true ); 64 | #endif 65 | } 66 | 67 | // Mask to bitmask 68 | // --------------------------------------------------------------------------------------- 69 | #define DEF_MASK_TO_BITMASK( TYPE_T, CMD ) \ 70 | template<> inline typename simd_type< TYPE_T, sse_tag >::bitmask_type \ 71 | mask_to_bitmask< TYPE_T, sse_tag >( simd_type< TYPE_T, sse_tag > mask ) { \ 72 | return CMD( mask ); \ 73 | } 74 | DEF_MASK_TO_BITMASK( int8_t, _mm_movemask_epi8 ) 75 | DEF_MASK_TO_BITMASK( int16_t, _mm_movemask_epi8 ) 76 | DEF_MASK_TO_BITMASK( int32_t, _mm_movemask_epi8 ) 77 | DEF_MASK_TO_BITMASK( int64_t, _mm_movemask_epi8 ) 78 | DEF_MASK_TO_BITMASK( float, _mm_movemask_ps ) 79 | DEF_MASK_TO_BITMASK( double, _mm_movemask_pd ) 80 | #undef DEF_MASK_TO_BITMASK 81 | 82 | // Greater than 83 | // --------------------------------------------------------------------------------------- 84 | #define DEF_GREATER_THAN( TYPE_T, CMD ) \ 85 | template<> inline simd_type< TYPE_T, sse_tag > \ 86 | greater< TYPE_T, sse_tag >( simd_type< TYPE_T, sse_tag > lhs, \ 87 | simd_type< TYPE_T, sse_tag > rhs ) { \ 88 | return CMD( lhs, rhs ); \ 89 | } 90 | 91 | DEF_GREATER_THAN( int8_t, _mm_cmpgt_epi8 ) 92 | DEF_GREATER_THAN( int16_t, _mm_cmpgt_epi16 ) 93 | DEF_GREATER_THAN( int32_t, _mm_cmpgt_epi32 ) 94 | DEF_GREATER_THAN( int64_t, _mm_cmpgt_epi64 ) 95 | DEF_GREATER_THAN( float, _mm_cmpgt_ps ) 96 | DEF_GREATER_THAN( double, _mm_cmpgt_pd ) 97 | #undef DEF_GREATER_THAN 98 | 99 | // Equals 100 | // --------------------------------------------------------------------------------------- 101 | #define DEF_EQUALS( TYPE_T, CMD ) \ 102 | template<> inline simd_type< TYPE_T, sse_tag > \ 103 | equal_to< TYPE_T, sse_tag >( simd_type< TYPE_T, sse_tag > lhs, \ 104 | simd_type< TYPE_T, sse_tag > rhs ) { \ 105 | return CMD( lhs, rhs ); \ 106 | } 107 | 108 | DEF_EQUALS( int8_t, _mm_cmpeq_epi8 ) 109 | DEF_EQUALS( int16_t, _mm_cmpeq_epi16 ) 110 | DEF_EQUALS( int32_t, _mm_cmpeq_epi32 ) 111 | DEF_EQUALS( int64_t, _mm_cmpeq_epi64 ) 112 | DEF_EQUALS( float, _mm_cmpeq_ps ) 113 | DEF_EQUALS( double, _mm_cmpeq_pd ) 114 | #undef DEF_EQUALS 115 | 116 | // all_of 117 | // --------------------------------------------------------------------------------------- 118 | template< typename ValueType_T > 119 | struct all_of_bitmask_op< ValueType_T, 120 | typename std::enable_if::value, sse_tag>::type > 121 | { 122 | inline bool operator()( typename simd_type< ValueType_T, sse_tag >::bitmask_type bitmask ) 123 | { 124 | return (bitmask == 0xffff); 125 | } 126 | }; 127 | 128 | template< typename ValueType_T > 129 | struct all_of_bitmask_op< ValueType_T, 130 | typename std::enable_if::value, sse_tag>::type > 131 | { 132 | inline bool operator()( typename simd_type< ValueType_T, sse_tag >::bitmask_type bitmask ) 133 | { 134 | constexpr static typename simd_type< ValueType_T, sse_tag >::bitmask_type true_mask = 135 | (1 << simd_type< ValueType_T, sse_tag >::simd_size) -1; 136 | return (bitmask == true_mask); 137 | } 138 | }; 139 | 140 | template< typename ValueType_T > 141 | struct all_of_op< ValueType_T, 142 | typename std::enable_if::value, sse_tag>::type > 143 | { 144 | inline bool operator()( simd_type< ValueType_T, sse_tag > mask ) 145 | { 146 | return !!_mm_test_all_ones( mask ); 147 | } 148 | }; 149 | 150 | template< typename ValueType_T > 151 | struct all_of_op< ValueType_T, 152 | typename std::enable_if::value, sse_tag>::type > 153 | { 154 | inline bool operator()( simd_type< ValueType_T, sse_tag > mask ) 155 | { 156 | return all_of_bitmask_op< ValueType_T, sse_tag >()( mask_to_bitmask< ValueType_T, sse_tag >( mask ) ); 157 | } 158 | }; 159 | 160 | // none_of 161 | // --------------------------------------------------------------------------------------- 162 | template< typename ValueType_T > 163 | struct none_of_op< ValueType_T, 164 | typename std::enable_if::value, sse_tag>::type > 165 | { 166 | inline bool operator()( simd_type< ValueType_T, sse_tag > mask ) 167 | { 168 | return !!_mm_testz_si128( mask, simd_type< ValueType_T, sse_tag >::ones() ); 169 | } 170 | }; 171 | 172 | template< typename ValueType_T > 173 | struct none_of_op< ValueType_T, 174 | typename std::enable_if::value, sse_tag>::type > 175 | { 176 | inline bool operator()( simd_type< ValueType_T, sse_tag > mask ) 177 | { 178 | return (0 == mask_to_bitmask< ValueType_T, sse_tag >( mask ) ); 179 | } 180 | }; 181 | 182 | } // namespace litesimd 183 | 184 | #endif // LITESIMD_HAS_SSE 185 | #endif // LITESIMD_ARCH_SSE_COMPARE_H 186 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | [![Build Status](https://travis-ci.org/andrelrt/litesimd.svg?branch=master)](https://travis-ci.org/andrelrt/litesimd) [![Build status](https://ci.appveyor.com/api/projects/status/t3fmylykanoma9ja/branch/master?svg=true)](https://ci.appveyor.com/project/andrelrt/litesimd/branch/master) [![License: MIT](https://img.shields.io/badge/License-MIT-green.svg)](https://opensource.org/licenses/MIT) [![Documentation](https://codedocs.xyz/andrelrt/litesimd.svg)](https://codedocs.xyz/andrelrt/litesimd/) 2 | 3 | Litesimd is a no overhead, header only, C++ library for SIMD processing. This library goal is to provide tools for developers to incorporate SIMD processing in all kinds of algorithms not only for calculations. To achieve this goal, some design principles are observed. 4 | 5 | ## Design principles 6 | 7 | #### SIMD for all kind of algorithms 8 | 9 | Typically SIMD is used for mathematical algorithms, such as linear algebra, FFT or imaging processing. However, the processor has several SIMD instructions for all purposes. Litesimd library has an emphasis on SIMD comparison and data shuffle instead of math operations. 10 | 11 | #### Not conceal the SIMD complexity 12 | 13 | By design, the library does not attempt to hide the complexity of using SIMD. When using any SIMD library, the developer needs to understand what is happening anyway. So why try to hide this complexity from him? Some C++ operators are (or will be) overloaded for convenience, but this is not the library focus. 14 | 15 | #### Intrincs interoperability 16 | 17 | Any SIMD library typically covers a smaller scope than the total set of processor SIMD instructions. Litesimd library must be transparently interoperable with SIMD intrincs, allowing the developer to perform more complex operations than originally anticipated by the library. 18 | 19 | #### No memory access 20 | 21 | Processor instructions are much faster than memory access. Although memory access is unavoidable, the litesimd library prefers processor instructions to manipulate or enumerate values within the SIMD registers to minimize memory use. 22 | 23 | ## Example 24 | 25 | ```cpp 26 | // Compiled with 27 | // 28 | // g++ -std=c++11 -O3 -mavx2 -I greater.cpp -o greater 29 | 30 | #include 31 | #include 32 | #include 33 | 34 | int main() 35 | { 36 | namespace ls = litesimd; 37 | 38 | // int32_t vector in default instruction set (AVX) 39 | ls::t_int32_simd cmp; 40 | 41 | // litesimd types are intrincs compatible 42 | cmp = _mm256_set_epi32( 80, 70, 60, 50, 40, 30, 20, 10 ); 43 | 44 | int32_t val = 5; 45 | 46 | // int32_simd_size is how many int32_t fits on t_int32_simd (8) 47 | for( size_t i = 0; i <= ls::t_int32_simd::simd_size; ++i ) 48 | { 49 | // Compare 'val' against all 'cmp' values 50 | uint32_t bitmask = ls::greater_bitmask( val, cmp ); 51 | 52 | // As 'cmp' is sorted, we can use the bitmask to find the 53 | // last item which 'val' is greater 54 | // 55 | // Returns values between [-1, ls::int32_simd_size) 56 | int index = ls::bitmask_last_index< int32_t >( bitmask ); 57 | 58 | // greater_last_index could be called instead 59 | // greater_bitmask + bitmask_last_index 60 | // 61 | // int index = ls::greater_last_index( val, cmp ); 62 | 63 | if( index < 0 ) 64 | { 65 | std::cout << "The value " << val 66 | << " is less than all values of " << cmp 67 | << std::endl; 68 | } 69 | else if( index == ls::int32_simd_size -1 ) 70 | { 71 | std::cout << "The value " << val 72 | << " is greater than all values of " << cmp 73 | << std::endl; 74 | } 75 | else 76 | { 77 | std::cout << "The value " << val 78 | << " is between items " << index 79 | << " and " << index + 1 80 | << " of " << cmp 81 | << std::endl; 82 | } 83 | 84 | val += 10; 85 | } 86 | return 0; 87 | } 88 | ``` 89 | This will produce the follow output: 90 | 91 | ``` 92 | $ ./greater 93 | The value 5 is less than all values of (80, 70, 60, 50, 40, 30, 20, 10) 94 | The value 15 is between items 0 and 1 of (80, 70, 60, 50, 40, 30, 20, 10) 95 | The value 25 is between items 1 and 2 of (80, 70, 60, 50, 40, 30, 20, 10) 96 | The value 35 is between items 2 and 3 of (80, 70, 60, 50, 40, 30, 20, 10) 97 | The value 45 is between items 3 and 4 of (80, 70, 60, 50, 40, 30, 20, 10) 98 | The value 55 is between items 4 and 5 of (80, 70, 60, 50, 40, 30, 20, 10) 99 | The value 65 is between items 5 and 6 of (80, 70, 60, 50, 40, 30, 20, 10) 100 | The value 75 is between items 6 and 7 of (80, 70, 60, 50, 40, 30, 20, 10) 101 | The value 85 is greater than all values of (80, 70, 60, 50, 40, 30, 20, 10) 102 | ``` 103 | 104 | ## Directory structure 105 | 106 | ``` 107 | litesimd/ 108 | doc/ ; Doxygen project 109 | include/litesimd/ 110 | algorithm/ 111 | for_each.h ; for_each item of simd_type, also for_each index of bitmask 112 | iota.h ; Fill vetor with [0, simd_size), eg. (3, 2, 1, 0) 113 | minmax.h ; Min and max functions 114 | detail/ ; Internal functions, classes and architecture dependent code. Should not be included directly 115 | helpers/ 116 | containers.h ; Aligned std containers, depends on boost::align 117 | iostream.h ; operator<< overload for litesimd types 118 | algorithm.h ; Includes all algorithms 119 | arithmetic.h ; add, sub, mul, mullo, mulhi, div functions 120 | bitwise.h ; bit_and, bit_or, bit_xor and bit_not functions 121 | compare.h ; greater, equal_to, mask_to_bitmask, bitmask_to_high/low_index 122 | intravector.h ; generic horizontal reduction 123 | shuffle.h ; high/low_insert, blend, get/set<> 124 | types.h ; simd_type 125 | samples/ 126 | binary_search/ ; Benchmark lower_bound implementations 127 | boyer_moore_horspool/ ; Substring search using SIMD (WIP: still slower than boost, but faster than std::string::find) 128 | bubble_sort/ ; Classic bubble sort in SIMD style 129 | greater/ ; Simple greater than sample (the same of above) 130 | nway_tree/ ; Another approach for same lower_bound search, using trees 131 | to_lower/ ; ASCII to_lower benchmark 132 | test/ ; Unit tests 133 | ``` 134 | 135 | ## Building 136 | 137 | As a header only library, the building process is only for samples, test and documentation. 138 | 139 | ``` 140 | $ mkdir build 141 | $ cd build 142 | $ cmake -DCMAKE_BUILD_TYPE=Release 143 | $ make 144 | $ make install 145 | ``` 146 | 147 | Boost libraries are required to build the samples. 148 | 149 | ## Support 150 | 151 | Litesimd is tested on follow environments: 152 | 153 | - Linux 154 | - GCC: 4.8, 4.9, 5, 6, 7 155 | - clang: 4, 5, 6 156 | - MacOSX 157 | - XCode: 8.3, 9, 9.1, 9.2, 9.3, 9.4 158 | - Windows (WIP) 159 | - MSVC: VS 2015, VS 2017 160 | 161 | Litesimd supports the follow instructions set: 162 | 163 | - x86 164 | - SSE4.2, AVX2 165 | -------------------------------------------------------------------------------- /include/litesimd/shuffle.h: -------------------------------------------------------------------------------- 1 | // MIT License 2 | // 3 | // Copyright (c) 2018 André Tupinambá 4 | // 5 | // Permission is hereby granted, free of charge, to any person obtaining a copy 6 | // of this software and associated documentation files (the "Software"), to deal 7 | // in the Software without restriction, including without limitation the rights 8 | // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | // copies of the Software, and to permit persons to whom the Software is 10 | // furnished to do so, subject to the following conditions: 11 | // 12 | // The above copyright notice and this permission notice shall be included in all 13 | // copies or substantial portions of the Software. 14 | // 15 | // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | // SOFTWARE. 22 | 23 | #ifndef LITESIMD_SHUFFLE_H 24 | #define LITESIMD_SHUFFLE_H 25 | 26 | #include 27 | #include 28 | #include 29 | 30 | namespace litesimd { 31 | 32 | /** 33 | * \defgroup shuffle Register manipulation 34 | * 35 | * In litesimd, the shuffle group has functions to manipulate the SIMD register. 36 | * 37 | * All this functions are accessable at `` 38 | */ 39 | 40 | /** 41 | * \ingroup shuffle 42 | * \brief Shift all values one index lower inside the SIMD register and 43 | * insert a value on the highest index. 44 | * 45 | * \param vec SIMD register 46 | * \param val Value to be inserted 47 | * \tparam ValueType_T Base type of original SIMD register 48 | * \returns SIMD register with the highest index modified 49 | * 50 | * **Example** 51 | * ```{.cpp} 52 | * #include 53 | * #include 54 | * #include 55 | * #include 56 | * 57 | * int main() 58 | * { 59 | * namespace ls = litesimd; 60 | * 61 | * ls::t_int32_simd x( 3, 2, 1, 0 ); 62 | * std::cout << "high_insert( x, 9 ): " 63 | * << ls::high_insert( x, 9 ) << std::endl; 64 | * return 0; 65 | * } 66 | * ``` 67 | * Output on a SSE compilation 68 | * ``` 69 | * high_insert( x, 9 ): (9, 3, 2, 1) 70 | * ``` 71 | */ 72 | template< typename SimdType_T, typename SimdType_T::simd_value_type* = nullptr > 73 | inline SimdType_T 74 | high_insert( SimdType_T vec, typename SimdType_T::simd_value_type val ) 75 | { 76 | return high_insert< typename SimdType_T::simd_value_type, 77 | typename SimdType_T::simd_tag >( vec, val ); 78 | } 79 | 80 | /** 81 | * \ingroup shuffle 82 | * \brief Shift all values one index lower inside the SIMD register and 83 | * insert a value on the lowest index. 84 | * 85 | * \param vec SIMD register 86 | * \param val Value to be inserted 87 | * \tparam ValueType_T Base type of original SIMD register 88 | * \returns SIMD register with the lowest index modified 89 | * 90 | * **Example** 91 | * ```{.cpp} 92 | * #include 93 | * #include 94 | * #include 95 | * #include 96 | * 97 | * int main() 98 | * { 99 | * namespace ls = litesimd; 100 | * 101 | * ls::t_int32_simd x( 3, 2, 1, 0 ); 102 | * std::cout << "low_insert( x, 9 ): " 103 | * << ls::low_insert( x, 9 ) << std::endl; 104 | * return 0; 105 | * } 106 | * ``` 107 | * Output on a SSE compilation 108 | * ``` 109 | * low_insert( x, 9 ): (2, 1, 0, 9) 110 | * ``` 111 | */ 112 | template< typename SimdType_T, typename SimdType_T::simd_value_type* = nullptr > 113 | inline SimdType_T 114 | low_insert( SimdType_T vec, typename SimdType_T::simd_value_type val ) 115 | { 116 | return low_insert< typename SimdType_T::simd_value_type, 117 | typename SimdType_T::simd_tag >( vec, val ); 118 | } 119 | 120 | // Blend helper functions (comments on arch/common) 121 | // --------------------------------------------------------------------------------------- 122 | template< typename ValueType_T, typename Tag_T = default_tag > 123 | inline simd_type< ValueType_T, Tag_T > 124 | blend( simd_type< ValueType_T, Tag_T > mask, 125 | ValueType_T trueVal, 126 | simd_type< ValueType_T, Tag_T > falseVal ) 127 | { 128 | return blend< ValueType_T, Tag_T >( mask, 129 | simd_type< ValueType_T, Tag_T >( trueVal ), 130 | falseVal ); 131 | } 132 | 133 | template< typename ValueType_T, typename Tag_T = default_tag > 134 | inline simd_type< ValueType_T, Tag_T > 135 | blend( simd_type< ValueType_T, Tag_T > mask, 136 | simd_type< ValueType_T, Tag_T > trueVal, 137 | ValueType_T falseVal ) 138 | { 139 | return blend< ValueType_T, Tag_T >( mask, 140 | trueVal, 141 | simd_type< ValueType_T, Tag_T >( falseVal ) ); 142 | } 143 | 144 | /** 145 | * \ingroup shuffle 146 | * \brief Extract one value from SIMD register. 147 | * 148 | * The `index` template parameter is verified with `static_assert` and the compilation will 149 | * fail with `"Index out of bounds"` error on invalid values. 150 | * 151 | * \param vec SIMD register 152 | * \tparam index Index of the value inside the SIMD register 153 | * \tparam ValueType_T Base type of original SIMD register 154 | * \returns The value extracted 155 | * 156 | * **Example** 157 | * ```{.cpp} 158 | * #include 159 | * #include 160 | * #include 161 | * 162 | * int main() 163 | * { 164 | * namespace ls = litesimd; 165 | * 166 | * ls::t_int32_simd x( 9, 7, 5, 2 ); 167 | * std::cout << "get<2>( x ): " 168 | * << ls::get<2>( x ) << std::endl; 169 | * return 0; 170 | * } 171 | * ``` 172 | * Output on a SSE compilation 173 | * ``` 174 | * get<2>( x ): 7 175 | * ``` 176 | */ 177 | template< int index, typename ValueType_T, typename Tag_T = default_tag > 178 | inline ValueType_T 179 | get( simd_type< ValueType_T, Tag_T > vec ) 180 | { 181 | static_assert( 0 <= index && index < simd_type< ValueType_T, Tag_T >::simd_size, 182 | "Index out of bounds" ); 183 | 184 | return get_functor< index, ValueType_T, Tag_T >()( vec ); 185 | } 186 | 187 | /** 188 | * \ingroup shuffle 189 | * \brief Extract one value from SIMD register. 190 | * 191 | * The `index` template parameter is verified with `static_assert` and the compilation will 192 | * fail with `"Index out of bounds"` error on invalid values. 193 | * 194 | * \param vec SIMD register 195 | * \param val The value to be set 196 | * \tparam index Index of the value inside the SIMD register 197 | * \tparam ValueType_T Base type of original SIMD register 198 | * \returns The value extracted 199 | * 200 | * **Example** 201 | * ```{.cpp} 202 | * #include 203 | * #include 204 | * #include 205 | * #include 206 | * 207 | * int main() 208 | * { 209 | * namespace ls = litesimd; 210 | * 211 | * ls::t_int32_simd x( 9, 7, 5, 2 ); 212 | * std::cout << "set<2>( x, 11 ): " 213 | * << ls::set<2>( x, 11 ) << std::endl; 214 | * return 0; 215 | * } 216 | * ``` 217 | * Output on a SSE compilation 218 | * ``` 219 | * set<2>( x, 11 ): (9, 11, 5, 2) 220 | * ``` 221 | */ 222 | template< int index, typename ValueType_T, typename Tag_T = default_tag > 223 | inline simd_type< ValueType_T, Tag_T > 224 | set( simd_type< ValueType_T, Tag_T > vec, ValueType_T val ) 225 | { 226 | static_assert( 0 <= index && index < simd_type< ValueType_T, Tag_T >::simd_size, 227 | "Index out of bounds" ); 228 | 229 | return set_functor< index, ValueType_T, Tag_T >()( vec, val ); 230 | } 231 | 232 | } // namespace litesimd 233 | 234 | #endif // LITESIMD_SHUFFLE_H 235 | -------------------------------------------------------------------------------- /include/litesimd/detail/arch/avx/compare.h: -------------------------------------------------------------------------------- 1 | // MIT License 2 | // 3 | // Copyright (c) 2018 André Tupinambá 4 | // 5 | // Permission is hereby granted, free of charge, to any person obtaining a copy 6 | // of this software and associated documentation files (the "Software"), to deal 7 | // in the Software without restriction, including without limitation the rights 8 | // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | // copies of the Software, and to permit persons to whom the Software is 10 | // furnished to do so, subject to the following conditions: 11 | // 12 | // The above copyright notice and this permission notice shall be included in all 13 | // copies or substantial portions of the Software. 14 | // 15 | // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | // SOFTWARE. 22 | 23 | #ifndef LITESIMD_ARCH_AVX_COMPARE_H 24 | #define LITESIMD_ARCH_AVX_COMPARE_H 25 | 26 | #ifdef LITESIMD_HAS_AVX 27 | 28 | #include 29 | #include 30 | #include 31 | #include 32 | 33 | namespace litesimd { 34 | 35 | // Bit scan 36 | // --------------------------------------------------------------------------------------- 37 | template<> inline std::pair 38 | bit_scan_forward< avx_tag >( uint32_t bitmask ) 39 | { 40 | return bit_scan_forward< sse_tag >( bitmask ); 41 | } 42 | 43 | template<> inline std::pair 44 | bit_scan_reverse< avx_tag >( uint32_t bitmask ) 45 | { 46 | return bit_scan_reverse< sse_tag >( bitmask ); 47 | } 48 | 49 | // Mask to bitmask 50 | // --------------------------------------------------------------------------------------- 51 | #define DEF_MASK_TO_BITMASK( TYPE_T, CMD ) \ 52 | template<> inline typename simd_type< TYPE_T, avx_tag >::bitmask_type \ 53 | mask_to_bitmask< TYPE_T, avx_tag >( simd_type< TYPE_T, avx_tag > mask ) { \ 54 | return CMD( mask ); \ 55 | } 56 | 57 | DEF_MASK_TO_BITMASK( int8_t, _mm256_movemask_epi8 ) 58 | DEF_MASK_TO_BITMASK( int16_t, _mm256_movemask_epi8 ) 59 | DEF_MASK_TO_BITMASK( int32_t, _mm256_movemask_epi8 ) 60 | DEF_MASK_TO_BITMASK( int64_t, _mm256_movemask_epi8 ) 61 | DEF_MASK_TO_BITMASK( float, _mm256_movemask_ps ) 62 | DEF_MASK_TO_BITMASK( double, _mm256_movemask_pd ) 63 | 64 | #undef DEF_MASK_TO_BITMASK 65 | 66 | // Greater than 67 | // --------------------------------------------------------------------------------------- 68 | #define DEF_GREATER_THAN( TYPE_T, CMD ) \ 69 | template<> inline simd_type< TYPE_T, avx_tag > \ 70 | greater< TYPE_T, avx_tag >( simd_type< TYPE_T, avx_tag > lhs, \ 71 | simd_type< TYPE_T, avx_tag > rhs ) { \ 72 | return CMD( lhs, rhs ); \ 73 | } 74 | 75 | DEF_GREATER_THAN( int8_t, _mm256_cmpgt_epi8 ) 76 | DEF_GREATER_THAN( int16_t, _mm256_cmpgt_epi16 ) 77 | DEF_GREATER_THAN( int32_t, _mm256_cmpgt_epi32 ) 78 | DEF_GREATER_THAN( int64_t, _mm256_cmpgt_epi64 ) 79 | 80 | #undef DEF_GREATER_THAN 81 | 82 | template<> inline simd_type< float, avx_tag > 83 | greater< float, avx_tag >( simd_type< float, avx_tag > lhs, 84 | simd_type< float, avx_tag > rhs ) 85 | { 86 | // Quietly ignore NaN 87 | return _mm256_cmp_ps( lhs, rhs, _CMP_GT_OQ ); 88 | } 89 | 90 | template<> inline simd_type< double, avx_tag > 91 | greater< double, avx_tag >( simd_type< double, avx_tag > lhs, 92 | simd_type< double, avx_tag > rhs ) 93 | { 94 | // Quietly ignore NaN 95 | return _mm256_cmp_pd( lhs, rhs, _CMP_GT_OQ ); 96 | } 97 | 98 | // Equals 99 | // --------------------------------------------------------------------------------------- 100 | #define DEF_EQUALS( TYPE_T, CMD ) \ 101 | template<> inline simd_type< TYPE_T, avx_tag > \ 102 | equal_to< TYPE_T, avx_tag >( simd_type< TYPE_T, avx_tag > lhs, \ 103 | simd_type< TYPE_T, avx_tag > rhs ) { \ 104 | return CMD( lhs, rhs ); \ 105 | } 106 | 107 | DEF_EQUALS( int8_t, _mm256_cmpeq_epi8 ) 108 | DEF_EQUALS( int16_t, _mm256_cmpeq_epi16 ) 109 | DEF_EQUALS( int32_t, _mm256_cmpeq_epi32 ) 110 | DEF_EQUALS( int64_t, _mm256_cmpeq_epi64 ) 111 | 112 | #undef DEF_EQUALS 113 | 114 | template<> inline simd_type< float, avx_tag > 115 | equal_to< float, avx_tag >( simd_type< float, avx_tag > lhs, 116 | simd_type< float, avx_tag > rhs ) 117 | { 118 | // Quietly ignore NaN 119 | return _mm256_cmp_ps( lhs, rhs, _CMP_EQ_OQ ); 120 | } 121 | 122 | template<> inline simd_type< double, avx_tag > 123 | equal_to< double, avx_tag >( simd_type< double, avx_tag > lhs, 124 | simd_type< double, avx_tag > rhs ) 125 | { 126 | // Quietly ignore NaN 127 | return _mm256_cmp_pd( lhs, rhs, _CMP_EQ_OQ ); 128 | } 129 | 130 | // none_of 131 | // --------------------------------------------------------------------------------------- 132 | template< typename ValueType_T > 133 | struct none_of_op< ValueType_T, 134 | typename std::enable_if::value, avx_tag>::type > 135 | { 136 | inline bool operator()( simd_type< ValueType_T, avx_tag > mask ) 137 | { 138 | return !!_mm256_testz_si256( mask, simd_type< ValueType_T, avx_tag >::ones() ); 139 | } 140 | }; 141 | 142 | template<> struct none_of_op< float, avx_tag > 143 | { 144 | inline bool operator()( simd_type< float, avx_tag > mask ) 145 | { 146 | __m256i imask = reinterpret_cast<__m256i>( static_cast<__m256>( mask ) ); 147 | return none_of_op< int32_t, avx_tag >()( imask ); 148 | } 149 | }; 150 | 151 | template<> struct none_of_op< double, avx_tag > 152 | { 153 | inline bool operator()( simd_type< double, avx_tag > mask ) 154 | { 155 | __m256i imask = reinterpret_cast<__m256i>( static_cast<__m256d>( mask ) ); 156 | return none_of_op< int64_t, avx_tag >()( imask ); 157 | } 158 | }; 159 | 160 | // all_of 161 | // --------------------------------------------------------------------------------------- 162 | template< typename ValueType_T > 163 | struct all_of_op< ValueType_T, 164 | typename std::enable_if::value, avx_tag>::type > 165 | { 166 | inline bool operator()( simd_type< ValueType_T, avx_tag > mask ) 167 | { 168 | return none_of_op< ValueType_T, avx_tag >()( bit_not( mask ) ); 169 | } 170 | }; 171 | 172 | template<> struct all_of_op< float, avx_tag > 173 | { 174 | inline bool operator()( simd_type< float, avx_tag > mask ) 175 | { 176 | __m256i imask = reinterpret_cast<__m256i>( static_cast<__m256>( mask ) ); 177 | return all_of_op< int32_t, avx_tag >()( imask ); 178 | } 179 | }; 180 | 181 | template<> struct all_of_op< double, avx_tag > 182 | { 183 | inline bool operator()( simd_type< double, avx_tag > mask ) 184 | { 185 | __m256i imask = reinterpret_cast<__m256i>( static_cast<__m256d>( mask ) ); 186 | return all_of_op< int64_t, avx_tag >()( imask ); 187 | } 188 | }; 189 | 190 | template< typename ValueType_T > 191 | struct all_of_bitmask_op< ValueType_T, 192 | typename std::enable_if::value, avx_tag>::type > 193 | { 194 | inline bool operator()( typename simd_type< ValueType_T, avx_tag >::bitmask_type bitmask ) 195 | { 196 | return (bitmask == 0xffffffff); 197 | } 198 | }; 199 | 200 | template< typename ValueType_T > 201 | struct all_of_bitmask_op< ValueType_T, 202 | typename std::enable_if::value, avx_tag>::type > 203 | { 204 | inline bool operator()( typename simd_type< ValueType_T, avx_tag >::bitmask_type bitmask ) 205 | { 206 | constexpr static typename simd_type< ValueType_T, avx_tag >::bitmask_type true_mask = 207 | (1 << simd_type< ValueType_T, avx_tag >::simd_size) -1; 208 | return (bitmask == true_mask); 209 | } 210 | }; 211 | 212 | } // namespace litesimd 213 | 214 | #endif // LITESIMD_HAS_AVX 215 | #endif // LITESIMD_ARCH_AVX_COMPARE_H 216 | -------------------------------------------------------------------------------- /samples/nway_tree/nway_tree.cpp: -------------------------------------------------------------------------------- 1 | // MIT License 2 | // 3 | // Copyright (c) 2018 André Tupinambá 4 | // 5 | // Permission is hereby granted, free of charge, to any person obtaining a copy 6 | // of this software and associated documentation files (the "Software"), to deal 7 | // in the Software without restriction, including without limitation the rights 8 | // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | // copies of the Software, and to permit persons to whom the Software is 10 | // furnished to do so, subject to the following conditions: 11 | // 12 | // The above copyright notice and this permission notice shall be included in all 13 | // copies or substantial portions of the Software. 14 | // 15 | // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | // SOFTWARE. 22 | 23 | #include 24 | #include 25 | #include 26 | #include 27 | #include 28 | #include 29 | 30 | #include 31 | #include 32 | #include 33 | 34 | bool g_verbose = true; 35 | namespace ls = litesimd; 36 | 37 | template< class Cont_T, typename TAG_T > 38 | struct container_only 39 | { 40 | using container_type = Cont_T; 41 | using value_type = typename container_type::value_type; 42 | using const_iterator = typename container_type::const_iterator; 43 | 44 | container_only( const container_type& ref ) : ref_( ref ){} 45 | 46 | void build_index(){} 47 | 48 | const_iterator find( const value_type& key ) 49 | { 50 | auto first = std::lower_bound( ref_.begin(), ref_.end(), key ); 51 | return (first!=ref_.end() && !(key<*first)) ? first : ref_.end(); 52 | } 53 | private: 54 | const container_type& ref_; 55 | }; 56 | 57 | template< class Cont_T, typename TAG_T > 58 | struct map_index 59 | { 60 | using container_type = Cont_T; 61 | using value_type = typename container_type::value_type; 62 | using const_iterator = typename container_type::const_iterator; 63 | 64 | map_index( const container_type& ref ) : ref_( ref ){} 65 | 66 | void build_index() 67 | { 68 | for( auto it = ref_.begin(); it != ref_.end(); ++it ) 69 | { 70 | index_[ *it ] = it; 71 | } 72 | } 73 | 74 | const_iterator find( const value_type& key ) 75 | { 76 | auto it = index_.find( key ); 77 | return (it != index_.end()) ? it->second : ref_.end() ; 78 | } 79 | 80 | private: 81 | const container_type& ref_; 82 | std::map< value_type, const_iterator > index_; 83 | }; 84 | 85 | template< class Cont_T, typename TAG_T > 86 | class nway_tree 87 | { 88 | public: 89 | using container_type = Cont_T; 90 | using value_type = typename container_type::value_type; 91 | using const_iterator = typename container_type::const_iterator; 92 | 93 | nway_tree( const container_type& ref ) 94 | : ref_( ref ){} 95 | 96 | void build_index() 97 | { 98 | build_index( ref_ ); 99 | } 100 | 101 | const_iterator find( const value_type& key ) const 102 | { 103 | size_t idx = 0; 104 | for( auto&& level : tree_ ) 105 | { 106 | int li = ls::greater_last_index< value_type, TAG_T >( key, *level.get_simd( idx ) ); 107 | idx = idx * array_size + li + 1; 108 | } 109 | 110 | const simd_type* cmp = reinterpret_cast< const simd_type* >( &ref_[ idx * array_size ] ); 111 | int off = ls::equal_to_last_index< value_type, TAG_T >( key, *cmp ); 112 | 113 | if( off < 0 ) 114 | { 115 | return ref_.end(); 116 | } 117 | auto it = ref_.begin(); 118 | std::advance( it, idx * array_size + off ); 119 | return it; 120 | } 121 | 122 | private: 123 | constexpr static size_t array_size = ls::simd_type< value_type, TAG_T >::simd_size; 124 | using simd_type = ls::simd_type< value_type, TAG_T >; 125 | 126 | struct tree_level 127 | { 128 | ls::vector< value_type > keys_; 129 | 130 | const simd_type* get_simd( size_t idx ) const 131 | { 132 | return reinterpret_cast< const simd_type* >( &keys_[ idx * array_size ] ); 133 | } 134 | 135 | void adjust() 136 | { 137 | size_t size = keys_.size() / array_size; 138 | if( keys_.size() > size * array_size ) 139 | ++size; 140 | size *= array_size; 141 | keys_.resize( size, std::numeric_limits< value_type >::max() ); 142 | } 143 | }; 144 | 145 | ls::vector< tree_level > tree_; 146 | const container_type& ref_; 147 | 148 | void build_index( const container_type& cont ) 149 | { 150 | if( cont.size() <= array_size ) 151 | return; 152 | 153 | tree_level level; 154 | for( size_t i = array_size-1; i < cont.size(); i += array_size ) 155 | { 156 | level.keys_.push_back( cont[ i ] ); 157 | } 158 | 159 | build_index( level.keys_ ); 160 | 161 | level.adjust(); 162 | tree_.emplace_back( std::move( level ) ); 163 | return; 164 | } 165 | }; 166 | 167 | void do_nothing( int32_t ); 168 | 169 | template< class Cont_T, template < typename... > class Index_T, typename TAG_T > 170 | uint64_t bench( const std::string& name, size_t size, size_t loop ) 171 | { 172 | using container_type = Cont_T; 173 | using index_type = Index_T< container_type, TAG_T >; 174 | 175 | boost::timer::cpu_timer timer; 176 | container_type org; 177 | 178 | srand( 1 ); 179 | std::generate_n( std::back_inserter(org), size, &rand ); 180 | container_type sorted( org ); 181 | std::sort( sorted.begin(), sorted.end() ); 182 | index_type index( sorted ); 183 | 184 | index.build_index(); 185 | 186 | timer.start(); 187 | for( size_t j = 0; j < loop; ++j ) 188 | { 189 | for( auto i : org ) 190 | { 191 | auto ret = index.find( i ); 192 | do_nothing( *ret ); 193 | } 194 | } 195 | timer.stop(); 196 | if( g_verbose ) 197 | std::cout << "Find all " << name << ": " << timer.format(); 198 | 199 | return timer.elapsed().wall; 200 | } 201 | 202 | int main(int argc, char* /*argv*/[]) 203 | { 204 | constexpr size_t runSize = 0x00400000; 205 | constexpr size_t loop = 10; 206 | if( argc > 1 ) 207 | { 208 | g_verbose = false; 209 | std::cout << "base,index_sse,index_avx" << std::endl; 210 | } 211 | else 212 | { 213 | std::cout << "\nsize: 0x" << std::hex << std::setw(8) << std::setfill( '0') << runSize << std::endl << std::endl; 214 | } 215 | while( 1 ) 216 | { 217 | uint64_t base = bench< ls::vector< int32_t >, container_only, void >( "lower_bound .", runSize, loop ); 218 | uint64_t index1 = bench< ls::vector< int32_t >, nway_tree, ls::sse_tag >( "index SSE ...", runSize, loop ); 219 | #ifdef LITESIMD_HAS_AVX 220 | uint64_t index2 = bench< ls::vector< int32_t >, nway_tree, ls::avx_tag >( "index AVX ...", runSize, loop ); 221 | #endif // LITESIMD_HAS_AVX 222 | 223 | if( g_verbose ) 224 | { 225 | bench< ls::vector< int32_t >, map_index, void >( "std::map ....", runSize, loop ); 226 | 227 | std::cout 228 | << std::endl << "Index Speed up SSE.......: " << std::fixed << std::setprecision(2) 229 | << static_cast(base)/static_cast(index1) << "x" 230 | 231 | #ifdef LITESIMD_HAS_AVX 232 | << std::endl << "Index Speed up AVX.......: " << std::fixed << std::setprecision(2) 233 | << static_cast(base)/static_cast(index2) << "x" 234 | #endif // LITESIMD_HAS_AVX 235 | 236 | << std::endl << std::endl; 237 | } 238 | else 239 | { 240 | std::cout 241 | << base << "," 242 | << index1 << "," 243 | #ifdef LITESIMD_HAS_AVX 244 | << index2 245 | #endif // LITESIMD_HAS_AVX 246 | << std::endl; 247 | } 248 | } 249 | return 0; 250 | } 251 | -------------------------------------------------------------------------------- /include/litesimd/detail/arch/common/arithmetic.h: -------------------------------------------------------------------------------- 1 | // MIT License 2 | // 3 | // Copyright (c) 2018 André Tupinambá 4 | // 5 | // Permission is hereby granted, free of charge, to any person obtaining a copy 6 | // of this software and associated documentation files (the "Software"), to deal 7 | // in the Software without restriction, including without limitation the rights 8 | // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | // copies of the Software, and to permit persons to whom the Software is 10 | // furnished to do so, subject to the following conditions: 11 | // 12 | // The above copyright notice and this permission notice shall be included in all 13 | // copies or substantial portions of the Software. 14 | // 15 | // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | // SOFTWARE. 22 | 23 | #ifndef LITESIMD_ARCH_COMMON_ARITHMETIC_H 24 | #define LITESIMD_ARCH_COMMON_ARITHMETIC_H 25 | 26 | #include 27 | 28 | namespace litesimd { 29 | 30 | // Basic operations 31 | // --------------------------------------------------------------------------------------- 32 | /** 33 | * \ingroup arithmetic 34 | * \brief Adds each value inside the packed SIMD register. 35 | * 36 | * Each value inside the packed SIMD register is added indenpendently and the result is 37 | * saved on the same index inside the packed SIMD register. 38 | * 39 | * | Index | 3 | 2 | 1 | 0 | 40 | * | :--- | :--: | :--: | :--: | :--: | 41 | * | Register X | a | b | c | d | 42 | * | Register Y | e | f | g | h | 43 | * | litesimd::add( X, Y ) | a + e | b + f | c + g | d + h | 44 | * 45 | * \param lhs, rhs SIMD registers to be added 46 | * \tparam ValueType_T Type of value inside packed SIMD register. 47 | * Only for signed numerical types (int8_t, int16_t, int32_t, int64_t, float and double) 48 | * \tparam Tag_T Metaprogramming tag for instruction set selection. 49 | * \returns SIMD register with the sum of each packed value 50 | * 51 | * **Example** 52 | * ```{.cpp} 53 | * #include 54 | * #include 55 | * #include 56 | * #include 57 | * 58 | * int main() 59 | * { 60 | * namespace ls = litesimd; 61 | * t_int32_simd a( 10 ), b( 1 ); 62 | * std::cout << "add( a, b ): " << ls::add( a, b ) << std::endl; 63 | * return 0; 64 | * } 65 | * ``` 66 | * Output on a SSE compilation 67 | * ``` 68 | * add( a, b ): ( 11, 11, 11, 11 ) 69 | * ``` 70 | * 71 | * \see sub 72 | */ 73 | template< typename ValueType_T, typename Tag_T = default_tag > 74 | simd_type< ValueType_T, Tag_T > 75 | add( simd_type< ValueType_T, Tag_T > lhs, simd_type< ValueType_T, Tag_T > rhs ){} 76 | 77 | /** 78 | * \ingroup arithmetic 79 | * \brief Subtract each value inside the packed SIMD register. 80 | * 81 | * Each value inside the packed SIMD register is subtracted indenpendently and the result is 82 | * saved on the same index inside the packed SIMD register. 83 | * 84 | * | Index | 3 | 2 | 1 | 0 | 85 | * | :--- | :--: | :--: | :--: | :--: | 86 | * | Register X | a | b | c | d | 87 | * | Register Y | e | f | g | h | 88 | * | litesimd::sub( X, Y ) | a - e | b - f | c - g | d - h | 89 | * 90 | * \param lhs, rhs SIMD registers to be subtracted 91 | * \tparam ValueType_T Type of value inside packed SIMD register. 92 | * Only for signed numerical types (int8_t, int16_t, int32_t, int64_t, float and double) 93 | * \tparam Tag_T Metaprogramming tag for instruction set selection. 94 | * \returns SIMD register with the difference of each packed value 95 | * 96 | * **Example** 97 | * ```{.cpp} 98 | * #include 99 | * #include 100 | * #include 101 | * #include 102 | * 103 | * int main() 104 | * { 105 | * namespace ls = litesimd; 106 | * t_int32_simd a( 10 ), b( 1 ); 107 | * std::cout << "sub( a, b ): " << ls::sub( a, b ) << std::endl; 108 | * return 0; 109 | * } 110 | * ``` 111 | * Output on a SSE compilation 112 | * ``` 113 | * sub( a, b ): ( 9, 9, 9, 9 ) 114 | * ``` 115 | * 116 | * \see add 117 | */ 118 | template< typename ValueType_T, typename Tag_T = default_tag > 119 | simd_type< ValueType_T, Tag_T > 120 | sub( simd_type< ValueType_T, Tag_T > lhs, simd_type< ValueType_T, Tag_T > rhs ){} 121 | 122 | /** 123 | * \ingroup arithmetic 124 | * \brief Multiply each value inside the packed SIMD register and save the least significant bits. 125 | * 126 | * Each value inside the packed SIMD register are multiplied independently and the 127 | * least significant bits of the result are saved on the same index inside the packed SIMD register. 128 | * 129 | * For 16 bits 130 | * | Index | 3 | 2 | 1 | 0 | 131 | * | :--- | :--: | :--: | :--: | :--: | 132 | * | Register X | a | b | c | d | 133 | * | Register Y | e | f | g | h | 134 | * | litesimd::mullo( X, Y ) | (a * e) & 0xffff | (b * f) & 0xffff | (c * g) & 0xffff | (d * h) & 0xffff | 135 | * 136 | * \param lhs, rhs SIMD registers to be multiplied 137 | * \tparam ValueType_T Type of value inside packed SIMD register. Only int16_t and int32_t are available for this function. 138 | * \tparam Tag_T Metaprogramming tag for instruction set selection. 139 | * \returns SIMD register with the least significant bit of multiplication result 140 | * 141 | * **Example** 142 | * ```{.cpp} 143 | * #include 144 | * #include 145 | * #include 146 | * #include 147 | * 148 | * int main() 149 | * { 150 | * namespace ls = litesimd; 151 | * t_int16_simd a( 0x1002 ), b( 0x10 ); 152 | * std::cout << "mullo( a, b ): " << ls::mullo( a, b ) << std::endl; 153 | * return 0; 154 | * } 155 | * ``` 156 | * Output on a SSE compilation 157 | * ``` 158 | * mullo( a, b ): (32, 32, 32, 32, 32, 32, 32, 32) 159 | * ``` 160 | * 161 | * \remarks This functions works only on int16_t and int32_t on Intel archtecture 162 | * \see mulhi 163 | */ 164 | template< typename ValueType_T, typename Tag_T = default_tag > 165 | simd_type< ValueType_T, Tag_T > 166 | mullo( simd_type< ValueType_T, Tag_T > lhs, simd_type< ValueType_T, Tag_T > rhs ){} 167 | 168 | /** 169 | * \ingroup arithmetic 170 | * \brief Multiply each value inside the packed SIMD register and save the most significant bits. 171 | * 172 | * Each value inside the packed SIMD register are multiplied independently and the 173 | * most significant bits of the result are saved on the same index inside the packed SIMD register. 174 | * 175 | * For 16 bits 176 | * | Index | 3 | 2 | 1 | 0 | 177 | * | :--- | :--: | :--: | :--: | :--: | 178 | * | Register X | a | b | c | d | 179 | * | Register Y | e | f | g | h | 180 | * | litesimd::mulhi( X, Y ) | (a * e) >> 16 | (b * f) >> 16 | (c * g) >> 16 | (d * h) >> 16 | 181 | * 182 | * \param lhs, rhs SIMD registers to be multiplied 183 | * \tparam ValueType_T Type of value inside packed SIMD register. Only int16_t is available for this function. 184 | * \tparam Tag_T Metaprogramming tag for instruction set selection. 185 | * \returns SIMD register with the most significant bit of multiplication result 186 | * 187 | * **Example** 188 | * ```{.cpp} 189 | * #include 190 | * #include 191 | * #include 192 | * #include 193 | * 194 | * int main() 195 | * { 196 | * namespace ls = litesimd; 197 | * t_int16_simd a( 0x1002 ), b( 0x10 ); 198 | * std::cout << "mulhi( a, b ): " << ls::mulhi( a, b ) << std::endl; 199 | * return 0; 200 | * } 201 | * ``` 202 | * Output on a SSE compilation 203 | * ``` 204 | * mulhi( a, b ): (16, 16, 16, 16, 16, 16, 16, 16) 205 | * ``` 206 | * 207 | * \remarks This functions works only on int16_t on Intel archtecture 208 | * \see mullo 209 | */ 210 | template< typename ValueType_T, typename Tag_T = default_tag > 211 | simd_type< ValueType_T, Tag_T > 212 | mulhi( simd_type< ValueType_T, Tag_T > lhs, simd_type< ValueType_T, Tag_T > rhs ){} 213 | 214 | template< typename ValueType_T, typename Tag_T = default_tag > 215 | simd_type< ValueType_T, Tag_T > 216 | div( simd_type< ValueType_T, Tag_T > lhs, simd_type< ValueType_T, Tag_T > rhs ){} 217 | 218 | } // namespace litesimd 219 | 220 | #endif // LITESIMD_ARCH_COMMON_ARITHMETIC_H 221 | -------------------------------------------------------------------------------- /include/litesimd/detail/arch/sse/shuffle.h: -------------------------------------------------------------------------------- 1 | // MIT License 2 | // 3 | // Copyright (c) 2018 André Tupinambá 4 | // 5 | // Permission is hereby granted, free of charge, to any person obtaining a copy 6 | // of this software and associated documentation files (the "Software"), to deal 7 | // in the Software without restriction, including without limitation the rights 8 | // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | // copies of the Software, and to permit persons to whom the Software is 10 | // furnished to do so, subject to the following conditions: 11 | // 12 | // The above copyright notice and this permission notice shall be included in all 13 | // copies or substantial portions of the Software. 14 | // 15 | // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | // SOFTWARE. 22 | 23 | #ifndef LITESIMD_SSE_SHUFFLE_H 24 | #define LITESIMD_SSE_SHUFFLE_H 25 | 26 | #ifdef LITESIMD_HAS_SSE 27 | 28 | #include 29 | #include 30 | #include 31 | 32 | namespace litesimd { 33 | 34 | // Blend 35 | // --------------------------------------------------------------------------------------- 36 | #define DEF_BLEND( TYPE_T, BLEND_CMD ) \ 37 | template<> inline simd_type< TYPE_T, sse_tag > \ 38 | blend< TYPE_T, sse_tag >( simd_type< TYPE_T, sse_tag > mask, \ 39 | simd_type< TYPE_T, sse_tag > trueVal, \ 40 | simd_type< TYPE_T, sse_tag > falseVal ) { \ 41 | return BLEND_CMD( falseVal, trueVal, mask ); } 42 | 43 | DEF_BLEND( int8_t, _mm_blendv_epi8 ) 44 | DEF_BLEND( int16_t, _mm_blendv_epi8 ) 45 | DEF_BLEND( int32_t, _mm_blendv_epi8 ) 46 | DEF_BLEND( int64_t, _mm_blendv_epi8 ) 47 | DEF_BLEND( float, _mm_blendv_ps ) 48 | DEF_BLEND( double, _mm_blendv_pd ) 49 | #undef DEF_BLEND 50 | 51 | // Get 52 | // --------------------------------------------------------------------------------------- 53 | template< int index > 54 | struct get_functor< index, int8_t, sse_tag > 55 | { 56 | int8_t inline operator()( simd_type< int8_t, sse_tag > vec ) 57 | { 58 | return _mm_extract_epi8( vec, index ); 59 | } 60 | }; 61 | 62 | template< int index > 63 | struct get_functor< index, int16_t, sse_tag > 64 | { 65 | int16_t inline operator()( simd_type< int16_t, sse_tag > vec ) 66 | { 67 | return _mm_extract_epi16( vec, index ); 68 | } 69 | }; 70 | 71 | template< int index > 72 | struct get_functor< index, int32_t, sse_tag > 73 | { 74 | int32_t inline operator()( simd_type< int32_t, sse_tag > vec ) 75 | { 76 | return _mm_extract_epi32( vec, index ); 77 | } 78 | }; 79 | 80 | template< int index > 81 | struct get_functor< index, int64_t, sse_tag > 82 | { 83 | int64_t inline operator()( simd_type< int64_t, sse_tag > vec ) 84 | { 85 | return _mm_extract_epi64( vec, index ); 86 | } 87 | }; 88 | 89 | template< int index > 90 | struct get_functor< index, float, sse_tag > 91 | { 92 | float inline operator()( simd_type< float, sse_tag > vec ) 93 | { 94 | return _mm_cvtss_f32( _mm_shuffle_ps( vec, vec, index ) ); 95 | } 96 | }; 97 | 98 | template< int index > 99 | struct get_functor< index, double, sse_tag > 100 | { 101 | double inline operator()( simd_type< double, sse_tag > vec ) 102 | { 103 | return _mm_cvtsd_f64( _mm_shuffle_pd( vec, vec, index ) ); 104 | } 105 | }; 106 | 107 | // Set 108 | // --------------------------------------------------------------------------------------- 109 | template< int index > 110 | struct set_functor< index, int8_t, sse_tag > 111 | { 112 | simd_type< int8_t, sse_tag > inline 113 | operator()( simd_type< int8_t, sse_tag > vec, int8_t val ) 114 | { 115 | return _mm_insert_epi8( vec, val, index ); 116 | } 117 | }; 118 | 119 | template< int index > 120 | struct set_functor< index, int16_t, sse_tag > 121 | { 122 | simd_type< int16_t, sse_tag > inline 123 | operator()( simd_type< int16_t, sse_tag > vec, int16_t val ) 124 | { 125 | return _mm_insert_epi16( vec, val, index ); 126 | } 127 | }; 128 | 129 | template< int index > 130 | struct set_functor< index, int32_t, sse_tag > 131 | { 132 | simd_type< int32_t, sse_tag > inline 133 | operator()( simd_type< int32_t, sse_tag > vec, int32_t val ) 134 | { 135 | return _mm_insert_epi32( vec, val, index ); 136 | } 137 | }; 138 | 139 | template< int index > 140 | struct set_functor< index, int64_t, sse_tag > 141 | { 142 | simd_type< int64_t, sse_tag > inline 143 | operator()( simd_type< int64_t, sse_tag > vec, int64_t val ) 144 | { 145 | return _mm_insert_epi64( vec, val, index ); 146 | } 147 | }; 148 | 149 | template< int index > 150 | struct set_functor< index, float, sse_tag > 151 | { 152 | simd_type< float, sse_tag > inline 153 | operator()( simd_type< float, sse_tag > vec, float val ) 154 | { 155 | return _mm_insert_ps( vec, _mm_set1_ps( val ), _MM_SHUFFLE( 0, index, 0, 0 ) ); 156 | } 157 | }; 158 | 159 | template<> 160 | struct set_functor< 0, double, sse_tag > 161 | { 162 | simd_type< double, sse_tag > inline 163 | operator()( simd_type< double, sse_tag > vec, double val ) 164 | { 165 | return _mm_shuffle_pd( _mm_set1_pd( val ), vec, 2 ); 166 | } 167 | }; 168 | 169 | template<> 170 | struct set_functor< 1, double, sse_tag > 171 | { 172 | simd_type< double, sse_tag > inline 173 | operator()( simd_type< double, sse_tag > vec, double val ) 174 | { 175 | return _mm_shuffle_pd( vec, _mm_set1_pd( val ), 0 ); 176 | } 177 | }; 178 | 179 | // High insert 180 | // --------------------------------------------------------------------------------------- 181 | template<> inline simd_type< int8_t, sse_tag > 182 | high_insert< int8_t, sse_tag >( simd_type< int8_t, sse_tag > vec, int8_t val ) 183 | { 184 | return set_functor<15, int8_t, sse_tag>()( _mm_srli_si128( vec, 1 ), val ); 185 | } 186 | 187 | template<> inline simd_type< int16_t, sse_tag > 188 | high_insert< int16_t, sse_tag >( simd_type< int16_t, sse_tag > vec, int16_t val ) 189 | { 190 | return set_functor<7, int16_t, sse_tag>()( _mm_srli_si128( vec, 2 ), val ); 191 | } 192 | 193 | template<> inline simd_type< int32_t, sse_tag > 194 | high_insert< int32_t, sse_tag >( simd_type< int32_t, sse_tag > vec, int32_t val ) 195 | { 196 | return set_functor<3, int32_t, sse_tag>()( _mm_shuffle_epi32( vec, _MM_SHUFFLE( 3, 3, 2, 1 ) ), val ); 197 | } 198 | 199 | template<> inline simd_type< int64_t, sse_tag > 200 | high_insert< int64_t, sse_tag >( simd_type< int64_t, sse_tag > vec, int64_t val ) 201 | { 202 | return set_functor<1, int64_t, sse_tag>()( _mm_shuffle_epi32( vec, _MM_SHUFFLE( 3, 3, 3, 2 ) ), val ); 203 | } 204 | 205 | template<> inline simd_type< float, sse_tag > 206 | high_insert< float, sse_tag >( simd_type< float, sse_tag > vec, 207 | float val ) 208 | { 209 | return set_functor<3, float, sse_tag>()( _mm_shuffle_ps( vec, vec, _MM_SHUFFLE( 3, 3, 2, 1 ) ), val ); 210 | } 211 | 212 | template<> inline simd_type< double, sse_tag > 213 | high_insert< double, sse_tag >( simd_type< double, sse_tag > vec, 214 | double val ) 215 | { 216 | return set_functor<1, double, sse_tag>()( _mm_shuffle_pd( vec, vec, 1 ), val ); 217 | } 218 | 219 | // Low insert 220 | // --------------------------------------------------------------------------------------- 221 | template<> inline simd_type< int8_t, sse_tag > 222 | low_insert< int8_t, sse_tag >( simd_type< int8_t, sse_tag > vec, int8_t val ) 223 | { 224 | return set_functor<0, int8_t, sse_tag>()( _mm_slli_si128( vec, 1 ), val ); 225 | } 226 | 227 | template<> inline simd_type< int16_t, sse_tag > 228 | low_insert< int16_t, sse_tag >( simd_type< int16_t, sse_tag > vec, int16_t val ) 229 | { 230 | return set_functor<0, int16_t, sse_tag>()( _mm_slli_si128( vec, 2 ), val ); 231 | } 232 | 233 | template<> inline simd_type< int32_t, sse_tag > 234 | low_insert< int32_t, sse_tag >( simd_type< int32_t, sse_tag > vec, int32_t val ) 235 | { 236 | return set_functor<0, int32_t, sse_tag>()( _mm_shuffle_epi32( vec, _MM_SHUFFLE( 2, 1, 0, 0 ) ), val ); 237 | } 238 | 239 | template<> inline simd_type< int64_t, sse_tag > 240 | low_insert< int64_t, sse_tag >( simd_type< int64_t, sse_tag > vec, int64_t val ) 241 | { 242 | return set_functor<0, int64_t, sse_tag>()( _mm_shuffle_epi32( vec, _MM_SHUFFLE( 1, 0, 0, 0 ) ), val ); 243 | } 244 | 245 | template<> inline simd_type< float, sse_tag > 246 | low_insert< float, sse_tag >( simd_type< float, sse_tag > vec, 247 | float val ) 248 | { 249 | return set_functor<0, float, sse_tag>()( _mm_shuffle_ps( vec, vec, _MM_SHUFFLE( 2, 1, 0, 0 ) ), val ); 250 | } 251 | 252 | template<> inline simd_type< double, sse_tag > 253 | low_insert< double, sse_tag >( simd_type< double, sse_tag > vec, 254 | double val ) 255 | { 256 | return set_functor<0, double, sse_tag>()( _mm_shuffle_pd( vec, vec, 0 ), val ); 257 | } 258 | 259 | } // namespace litesimd 260 | 261 | #endif // LITESIMD_HAS_SSE 262 | #endif // LITESIMD_SSE_SHUFFLE_H 263 | --------------------------------------------------------------------------------