├── .codedocs
├── samples
    ├── CMakeLists.txt
    ├── greater
    │   ├── CMakeLists.txt
    │   └── greater.cpp
    ├── nway_tree
    │   ├── CMakeLists.txt
    │   ├── do_nothing.cpp
    │   └── nway_tree.cpp
    ├── to_lower
    │   ├── CMakeLists.txt
    │   └── do_nothing.cpp
    ├── bubble_sort
    │   └── CMakeLists.txt
    ├── binary_search
    │   ├── CMakeLists.txt
    │   └── do_nothing.cpp
    └── boyer_moore_horspool
    │   ├── CMakeLists.txt
    │   └── do_nothing.cpp
├── .gitignore
├── CMakeLists.txt
├── doc
    └── CMakeLists.txt
├── LICENSE
├── appveyor.yml
├── test
    ├── main.cpp
    ├── CMakeLists.txt
    ├── shuffle.cpp
    ├── bitwise.cpp
    ├── intravector.cpp
    └── arithmetic.cpp
├── include
    └── litesimd
    │   ├── detail
    │       ├── arch
    │       │   ├── intravector.h
    │       │   ├── bitwise.h
    │       │   ├── compare.h
    │       │   ├── shuffle.h
    │       │   ├── traits.h
    │       │   ├── arithmetic.h
    │       │   ├── avx
    │       │   │   ├── tag.h
    │       │   │   ├── detail
    │       │   │   │   └── compatibility.h
    │       │   │   ├── algorithm.h
    │       │   │   ├── bitwise.h
    │       │   │   ├── arithmetic.h
    │       │   │   ├── intravector.h
    │       │   │   ├── traits.h
    │       │   │   └── compare.h
    │       │   ├── sse
    │       │   │   ├── tag.h
    │       │   │   ├── algorithm.h
    │       │   │   ├── bitwise.h
    │       │   │   ├── arithmetic.h
    │       │   │   ├── intravector.h
    │       │   │   ├── traits.h
    │       │   │   ├── compare.h
    │       │   │   └── shuffle.h
    │       │   ├── common
    │       │   │   ├── traits.h
    │       │   │   ├── intravector.h
    │       │   │   ├── arch.h
    │       │   │   ├── shuffle.h
    │       │   │   ├── algorithm.h
    │       │   │   ├── bitwise.h
    │       │   │   ├── compare.h
    │       │   │   └── arithmetic.h
    │       │   └── tag.h
    │       └── helper_macros.h
    │   ├── algorithm.h
    │   ├── helpers
    │       ├── containers.h
    │       └── iostream.h
    │   ├── algorithm
    │       ├── detail
    │       │   └── for_each_loop.h
    │       ├── iota.h
    │       └── minmax.h
    │   ├── intravector.h
    │   ├── arithmetic.h
    │   ├── types.h
    │   └── shuffle.h
├── .travis.yml
└── README.md


/.codedocs:
--------------------------------------------------------------------------------
1 | DOXYFILE = doc/litesimd.doxy
2 | INPUT = include
3 | 


--------------------------------------------------------------------------------
/samples/CMakeLists.txt:
--------------------------------------------------------------------------------
 1 | 
 2 | # Boost is requirement for samples
 3 | find_package(Boost 1.56.0 COMPONENTS timer chrono system)
 4 | 
 5 | if(Boost_FOUND)
 6 |     add_subdirectory(binary_search)
 7 |     add_subdirectory(boyer_moore_horspool)
 8 |     add_subdirectory(bubble_sort)
 9 |     add_subdirectory(greater)
10 |     add_subdirectory(nway_tree)
11 |     add_subdirectory(to_lower)
12 | endif()
13 | 


--------------------------------------------------------------------------------
/samples/greater/CMakeLists.txt:
--------------------------------------------------------------------------------
 1 | project(greater)
 2 | aux_source_directory(. SRC_LIST)
 3 | add_executable(${PROJECT_NAME}
 4 | 	${SRC_LIST}
 5 | )
 6 | 
 7 | target_include_directories(${PROJECT_NAME}
 8 | 	SYSTEM PUBLIC
 9 |     ${Boost_INCLUDE_DIRS}
10 | )
11 | 
12 | target_link_libraries(${PROJECT_NAME} PRIVATE
13 |     litesimd
14 |     ${Boost_LIBRARIES}
15 | )
16 | 
17 | set_property(TARGET ${PROJECT_NAME} PROPERTY CXX_STANDARD 11)
18 | 


--------------------------------------------------------------------------------
/samples/nway_tree/CMakeLists.txt:
--------------------------------------------------------------------------------
 1 | project(nway_tree)
 2 | aux_source_directory(. SRC_LIST)
 3 | add_executable(${PROJECT_NAME}
 4 | 	${SRC_LIST}
 5 | )
 6 | 
 7 | target_include_directories(${PROJECT_NAME}
 8 | 	SYSTEM PUBLIC
 9 |     ${Boost_INCLUDE_DIRS}
10 | )
11 | 
12 | target_link_libraries(${PROJECT_NAME} PRIVATE
13 |     litesimd
14 |     ${Boost_LIBRARIES}
15 | )
16 | 
17 | set_property(TARGET ${PROJECT_NAME} PROPERTY CXX_STANDARD 11)
18 | 


--------------------------------------------------------------------------------
/samples/to_lower/CMakeLists.txt:
--------------------------------------------------------------------------------
 1 | project(to_lower)
 2 | aux_source_directory(. SRC_LIST)
 3 | add_executable(${PROJECT_NAME}
 4 | 	${SRC_LIST}
 5 | )
 6 | 
 7 | target_include_directories(${PROJECT_NAME}
 8 | 	SYSTEM PUBLIC
 9 |     ${Boost_INCLUDE_DIRS}
10 | )
11 | 
12 | target_link_libraries(${PROJECT_NAME} PRIVATE
13 |     litesimd
14 |     ${Boost_LIBRARIES}
15 | )
16 | 
17 | set_property(TARGET ${PROJECT_NAME} PROPERTY CXX_STANDARD 11)
18 | 


--------------------------------------------------------------------------------
/samples/bubble_sort/CMakeLists.txt:
--------------------------------------------------------------------------------
 1 | project(bubble_sort)
 2 | aux_source_directory(. SRC_LIST)
 3 | add_executable(${PROJECT_NAME}
 4 | 	${SRC_LIST}
 5 | )
 6 | 
 7 | target_include_directories(${PROJECT_NAME}
 8 | 	SYSTEM PUBLIC
 9 |     ${Boost_INCLUDE_DIRS}
10 | )
11 | 
12 | target_link_libraries(${PROJECT_NAME} PRIVATE
13 |     litesimd
14 |     ${Boost_LIBRARIES}
15 | )
16 | 
17 | set_property(TARGET ${PROJECT_NAME} PROPERTY CXX_STANDARD 11)
18 | 


--------------------------------------------------------------------------------
/samples/binary_search/CMakeLists.txt:
--------------------------------------------------------------------------------
 1 | project(binary_search)
 2 | aux_source_directory(. SRC_LIST)
 3 | add_executable(${PROJECT_NAME}
 4 | 	${SRC_LIST}
 5 | )
 6 | 
 7 | target_include_directories(${PROJECT_NAME}
 8 | 	SYSTEM PUBLIC
 9 |     ${Boost_INCLUDE_DIRS}
10 | )
11 | 
12 | target_link_libraries(${PROJECT_NAME} PRIVATE
13 |     litesimd
14 |     ${Boost_LIBRARIES}
15 | )
16 | 
17 | set_property(TARGET ${PROJECT_NAME} PROPERTY CXX_STANDARD 11)
18 | 


--------------------------------------------------------------------------------
/samples/boyer_moore_horspool/CMakeLists.txt:
--------------------------------------------------------------------------------
 1 | project(boyer_moore_horspool)
 2 | aux_source_directory(. SRC_LIST)
 3 | add_executable(${PROJECT_NAME}
 4 | 	${SRC_LIST}
 5 | )
 6 | 
 7 | target_include_directories(${PROJECT_NAME}
 8 | 	SYSTEM PUBLIC
 9 |     ${Boost_INCLUDE_DIRS}
10 | )
11 | 
12 | target_link_libraries(${PROJECT_NAME} PRIVATE
13 |     litesimd
14 |     ${Boost_LIBRARIES}
15 | )
16 | 
17 | set_property(TARGET ${PROJECT_NAME} PROPERTY CXX_STANDARD 11)
18 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
 1 | # Prerequisites
 2 | *.d
 3 | 
 4 | # Compiled Object files
 5 | *.slo
 6 | *.lo
 7 | *.o
 8 | *.obj
 9 | 
10 | # Precompiled Headers
11 | *.gch
12 | *.pch
13 | 
14 | # Compiled Dynamic libraries
15 | *.so
16 | *.dylib
17 | *.dll
18 | 
19 | # Fortran module files
20 | *.mod
21 | *.smod
22 | 
23 | # Compiled Static libraries
24 | *.lai
25 | *.la
26 | *.a
27 | *.lib
28 | 
29 | # Executables
30 | *.exe
31 | *.out
32 | *.app
33 | 
34 | # vim
35 | *.swp
36 | 
37 | # Directories
38 | build/
39 | compile_commands.json
40 | ID
41 | 


--------------------------------------------------------------------------------
/CMakeLists.txt:
--------------------------------------------------------------------------------
 1 | project(litesimd)
 2 | cmake_minimum_required(VERSION 3.5)
 3 | 
 4 | if(MSVC)
 5 |     add_compile_options(
 6 |         "/W4"
 7 |         "$<$<CONFIG:RELEASE>:/O2>"
 8 |         "$<$<CONFIG:RELEASE>:/Oi>"
 9 |         "/MD$<$<CONFIG:Debug>:d>"
10 |     )
11 | else()
12 |     add_compile_options(
13 |         "-Wall"
14 |         "-march=native"
15 |         "-mtune=native"
16 |         "-fno-strict-aliasing"
17 |     )
18 | endif()
19 | 
20 | set(CMAKE_EXPORT_COMPILE_COMMANDS ON)
21 | 
22 | enable_testing()
23 | 
24 | add_library(${PROJECT_NAME} INTERFACE)
25 | target_compile_definitions(${PROJECT_NAME} INTERFACE LIBRARY_HEADER_ONLY)
26 | target_include_directories(${PROJECT_NAME} INTERFACE include)
27 | install(DIRECTORY include/ DESTINATION include)
28 | 
29 | add_subdirectory(doc)
30 | add_subdirectory(samples)
31 | add_subdirectory(test)
32 | 


--------------------------------------------------------------------------------
/doc/CMakeLists.txt:
--------------------------------------------------------------------------------
 1 | # add a target to generate API documentation with Doxygen
 2 | find_package(Doxygen)
 3 | option(BUILD_DOCUMENTATION "Create and install the HTML based API documentation (requires Doxygen)" ${DOXYGEN_FOUND})
 4 | 
 5 | if(BUILD_DOCUMENTATION)
 6 |     if(NOT DOXYGEN_FOUND)
 7 |         message(FATAL_ERROR "Doxygen is needed to build the documentation.")
 8 |     endif()
 9 | 
10 |     set(doxyfile_in ${CMAKE_CURRENT_SOURCE_DIR}/litesimd.doxy)
11 |     set(doxyfile ${CMAKE_CURRENT_BINARY_DIR}/Doxyfile)
12 | 
13 |     configure_file(${doxyfile_in} ${doxyfile} @ONLY)
14 | 
15 |     add_custom_target(doc ALL
16 |         COMMAND ${DOXYGEN_EXECUTABLE} ${doxyfile}
17 |         WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}
18 |         COMMENT "Generating litesimd documentation with Doxygen"
19 |         VERBATIM)
20 | 
21 |     install(DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}/litesimd DESTINATION share/doc)
22 | endif()
23 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2018 André Tupinambá
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/samples/nway_tree/do_nothing.cpp:
--------------------------------------------------------------------------------
 1 | // MIT License
 2 | //
 3 | // Copyright (c) 2018 André Tupinambá
 4 | //
 5 | // Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | // of this software and associated documentation files (the "Software"), to deal
 7 | // in the Software without restriction, including without limitation the rights
 8 | // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | // copies of the Software, and to permit persons to whom the Software is
10 | // furnished to do so, subject to the following conditions:
11 | //
12 | // The above copyright notice and this permission notice shall be included in all
13 | // copies or substantial portions of the Software.
14 | //
15 | // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | // SOFTWARE.
22 | 
23 | #include <stdint.h> 
24 | 
25 | void do_nothing( int32_t )
26 | {
27 | }
28 | 


--------------------------------------------------------------------------------
/samples/binary_search/do_nothing.cpp:
--------------------------------------------------------------------------------
 1 | // MIT License
 2 | //
 3 | // Copyright (c) 2018 André Tupinambá
 4 | //
 5 | // Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | // of this software and associated documentation files (the "Software"), to deal
 7 | // in the Software without restriction, including without limitation the rights
 8 | // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | // copies of the Software, and to permit persons to whom the Software is
10 | // furnished to do so, subject to the following conditions:
11 | //
12 | // The above copyright notice and this permission notice shall be included in all
13 | // copies or substantial portions of the Software.
14 | //
15 | // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | // SOFTWARE.
22 | 
23 | #include <stdint.h>
24 | 
25 | void do_nothing( int32_t )
26 | {
27 | }
28 | 


--------------------------------------------------------------------------------
/samples/boyer_moore_horspool/do_nothing.cpp:
--------------------------------------------------------------------------------
 1 | // MIT License
 2 | //
 3 | // Copyright (c) 2018 André Tupinambá
 4 | //
 5 | // Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | // of this software and associated documentation files (the "Software"), to deal
 7 | // in the Software without restriction, including without limitation the rights
 8 | // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | // copies of the Software, and to permit persons to whom the Software is
10 | // furnished to do so, subject to the following conditions:
11 | //
12 | // The above copyright notice and this permission notice shall be included in all
13 | // copies or substantial portions of the Software.
14 | //
15 | // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | // SOFTWARE.
22 | 
23 | #include <cstddef>
24 | 
25 | void do_nothing( size_t )
26 | {
27 | }
28 | 


--------------------------------------------------------------------------------
/appveyor.yml:
--------------------------------------------------------------------------------
 1 | version: 0.1.0.{build}
 2 | skip_tags: true
 3 | image:
 4 |     - Visual Studio 2017
 5 |     - Visual Studio 2015
 6 | platform: x64
 7 | configuration: Release
 8 | clone_folder: c:\projects\litesimd
 9 | 
10 | environment:
11 |     matrix:
12 |     - arch: Win64
13 | matrix:
14 |     fast_finish: true
15 | 
16 | # skip unsupported combinations
17 | init:
18 |     - set arch=
19 |     - if "%arch%"=="Win64" ( set arch= Win64)
20 |     - echo %arch%
21 |     - echo %APPVEYOR_BUILD_WORKER_IMAGE%
22 |     - if "%APPVEYOR_BUILD_WORKER_IMAGE%"=="Visual Studio 2017" ( set generator="Visual Studio 15 2017" )
23 |     - if "%APPVEYOR_BUILD_WORKER_IMAGE%"=="Visual Studio 2015" ( set generator="Visual Studio 14 2015" )
24 |     - echo %generator%
25 | 
26 | before_build:
27 |     - cmd: |-
28 |         mkdir build
29 |         cd build
30 |         cmake --version
31 |         cmake -G %generator% -DCMAKE_CXX_FLAGS="/EHsc /D BOOST_ALL_DYN_LINK /D WIN32_LEAN_AND_MEAN" -DBOOST_ROOT=C:\Libraries\boost_1_65_1 -DCMAKE_BUILD_TYPE=Release -DCMAKE_GENERATOR_PLATFORM=x64 ..
32 | build:
33 |     project: c:\projects\litesimd\build\litesimd.sln
34 |     verbosity: normal
35 |     parallel: true
36 | only_commits:
37 |   files:
38 |       - CMakeLists.txt
39 |       - appveyor.yml
40 |       - include/
41 |       - samples/
42 |       - test/
43 | 


--------------------------------------------------------------------------------
/samples/to_lower/do_nothing.cpp:
--------------------------------------------------------------------------------
 1 | // MIT License
 2 | //
 3 | // Copyright (c) 2018 André Tupinambá
 4 | //
 5 | // Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | // of this software and associated documentation files (the "Software"), to deal
 7 | // in the Software without restriction, including without limitation the rights
 8 | // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | // copies of the Software, and to permit persons to whom the Software is
10 | // furnished to do so, subject to the following conditions:
11 | //
12 | // The above copyright notice and this permission notice shall be included in all
13 | // copies or substantial portions of the Software.
14 | //
15 | // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | // SOFTWARE.
22 | 
23 | #include <string>
24 | #include <litesimd/helpers/containers.h>
25 | 
26 | void do_nothing( const litesimd::string& ) {}
27 | 


--------------------------------------------------------------------------------
/test/main.cpp:
--------------------------------------------------------------------------------
 1 | // MIT License
 2 | //
 3 | // Copyright (c) 2018 André Tupinambá
 4 | //
 5 | // Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | // of this software and associated documentation files (the "Software"), to deal
 7 | // in the Software without restriction, including without limitation the rights
 8 | // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | // copies of the Software, and to permit persons to whom the Software is
10 | // furnished to do so, subject to the following conditions:
11 | //
12 | // The above copyright notice and this permission notice shall be included in all
13 | // copies or substantial portions of the Software.
14 | //
15 | // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | // SOFTWARE.
22 | 
23 | #include "gtest/gtest.h"
24 | 
25 | int main(int argc, char **argv)
26 | {
27 |     ::testing::InitGoogleTest(&argc, argv);
28 |     int ret = RUN_ALL_TESTS();
29 |     return ret;
30 | }
31 | 
32 | 


--------------------------------------------------------------------------------
/include/litesimd/detail/arch/intravector.h:
--------------------------------------------------------------------------------
 1 | // MIT License
 2 | //
 3 | // Copyright (c) 2018 André Tupinambá
 4 | //
 5 | // Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | // of this software and associated documentation files (the "Software"), to deal
 7 | // in the Software without restriction, including without limitation the rights
 8 | // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | // copies of the Software, and to permit persons to whom the Software is
10 | // furnished to do so, subject to the following conditions:
11 | //
12 | // The above copyright notice and this permission notice shall be included in all
13 | // copies or substantial portions of the Software.
14 | //
15 | // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | // SOFTWARE.
22 | 
23 | #ifndef LITESIMD_ARCH_INTRAVECTOR_H
24 | #define LITESIMD_ARCH_INTRAVECTOR_H
25 | 
26 | #include <litesimd/detail/arch/sse/intravector.h>
27 | #include <litesimd/detail/arch/avx/intravector.h>
28 | 
29 | #endif // LITESIMD_ARCH_INTRAVECTOR_H
30 | 
31 | 


--------------------------------------------------------------------------------
/include/litesimd/detail/arch/bitwise.h:
--------------------------------------------------------------------------------
 1 | // MIT License
 2 | //
 3 | // Copyright (c) 2018 André Tupinambá
 4 | //
 5 | // Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | // of this software and associated documentation files (the "Software"), to deal
 7 | // in the Software without restriction, including without limitation the rights
 8 | // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | // copies of the Software, and to permit persons to whom the Software is
10 | // furnished to do so, subject to the following conditions:
11 | //
12 | // The above copyright notice and this permission notice shall be included in all
13 | // copies or substantial portions of the Software.
14 | //
15 | // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | // SOFTWARE.
22 | 
23 | #ifndef LITESIMD_ARCH_BITWISE_H
24 | #define LITESIMD_ARCH_BITWISE_H
25 | 
26 | #include <litesimd/types.h>
27 | #include <litesimd/detail/arch/sse/bitwise.h>
28 | #include <litesimd/detail/arch/avx/bitwise.h>
29 | 
30 | #endif // LITESIMD_ARCH_BITWISE_H
31 | 


--------------------------------------------------------------------------------
/include/litesimd/detail/arch/compare.h:
--------------------------------------------------------------------------------
 1 | // MIT License
 2 | //
 3 | // Copyright (c) 2018 André Tupinambá
 4 | //
 5 | // Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | // of this software and associated documentation files (the "Software"), to deal
 7 | // in the Software without restriction, including without limitation the rights
 8 | // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | // copies of the Software, and to permit persons to whom the Software is
10 | // furnished to do so, subject to the following conditions:
11 | //
12 | // The above copyright notice and this permission notice shall be included in all
13 | // copies or substantial portions of the Software.
14 | //
15 | // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | // SOFTWARE.
22 | 
23 | #ifndef LITESIMD_ARCH_COMPARE_H
24 | #define LITESIMD_ARCH_COMPARE_H
25 | 
26 | #include <litesimd/types.h>
27 | #include <litesimd/detail/arch/sse/compare.h>
28 | #include <litesimd/detail/arch/avx/compare.h>
29 | 
30 | #endif // LITESIMD_ARCH_COMPARE_H
31 | 


--------------------------------------------------------------------------------
/include/litesimd/detail/arch/shuffle.h:
--------------------------------------------------------------------------------
 1 | // MIT License
 2 | //
 3 | // Copyright (c) 2018 André Tupinambá
 4 | //
 5 | // Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | // of this software and associated documentation files (the "Software"), to deal
 7 | // in the Software without restriction, including without limitation the rights
 8 | // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | // copies of the Software, and to permit persons to whom the Software is
10 | // furnished to do so, subject to the following conditions:
11 | //
12 | // The above copyright notice and this permission notice shall be included in all
13 | // copies or substantial portions of the Software.
14 | //
15 | // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | // SOFTWARE.
22 | 
23 | #ifndef LITESIMD_ARCH_SHUFFLE_H
24 | #define LITESIMD_ARCH_SHUFFLE_H
25 | 
26 | #include <litesimd/types.h>
27 | #include <litesimd/detail/arch/sse/shuffle.h>
28 | #include <litesimd/detail/arch/avx/shuffle.h>
29 | 
30 | #endif // LITESIMD_ARCH_SHUFFLE_H
31 | 


--------------------------------------------------------------------------------
/include/litesimd/detail/arch/traits.h:
--------------------------------------------------------------------------------
 1 | // MIT License
 2 | //
 3 | // Copyright (c) 2018 André Tupinambá
 4 | //
 5 | // Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | // of this software and associated documentation files (the "Software"), to deal
 7 | // in the Software without restriction, including without limitation the rights
 8 | // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | // copies of the Software, and to permit persons to whom the Software is
10 | // furnished to do so, subject to the following conditions:
11 | //
12 | // The above copyright notice and this permission notice shall be included in all
13 | // copies or substantial portions of the Software.
14 | //
15 | // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | // SOFTWARE.
22 | 
23 | #ifndef LITESIMD_ARCH_TRAITS_H
24 | #define LITESIMD_ARCH_TRAITS_H
25 | 
26 | #include <litesimd/detail/arch/tag.h>
27 | #include <litesimd/detail/arch/sse/traits.h>
28 | #include <litesimd/detail/arch/avx/traits.h>
29 | 
30 | #endif // LITESIMD_ARCH_TRAITS_H
31 | 


--------------------------------------------------------------------------------
/include/litesimd/detail/arch/arithmetic.h:
--------------------------------------------------------------------------------
 1 | // MIT License
 2 | //
 3 | // Copyright (c) 2018 André Tupinambá
 4 | //
 5 | // Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | // of this software and associated documentation files (the "Software"), to deal
 7 | // in the Software without restriction, including without limitation the rights
 8 | // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | // copies of the Software, and to permit persons to whom the Software is
10 | // furnished to do so, subject to the following conditions:
11 | //
12 | // The above copyright notice and this permission notice shall be included in all
13 | // copies or substantial portions of the Software.
14 | //
15 | // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | // SOFTWARE.
22 | 
23 | #ifndef LITESIMD_ARCH_ARITHMETIC_H
24 | #define LITESIMD_ARCH_ARITHMETIC_H
25 | 
26 | #include <litesimd/types.h>
27 | #include <litesimd/detail/arch/sse/arithmetic.h>
28 | #include <litesimd/detail/arch/avx/arithmetic.h>
29 | 
30 | #endif // LITESIMD_ARCH_ARITHMETIC_H
31 | 
32 | 
33 | 


--------------------------------------------------------------------------------
/include/litesimd/detail/arch/avx/tag.h:
--------------------------------------------------------------------------------
 1 | // MIT License
 2 | //
 3 | // Copyright (c) 2018 André Tupinambá
 4 | //
 5 | // Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | // of this software and associated documentation files (the "Software"), to deal
 7 | // in the Software without restriction, including without limitation the rights
 8 | // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | // copies of the Software, and to permit persons to whom the Software is
10 | // furnished to do so, subject to the following conditions:
11 | //
12 | // The above copyright notice and this permission notice shall be included in all
13 | // copies or substantial portions of the Software.
14 | //
15 | // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | // SOFTWARE.
22 | 
23 | #ifndef LITESIMD_AVX_TAG_H
24 | #define LITESIMD_AVX_TAG_H
25 | 
26 | #include <litesimd/detail/arch/common/arch.h>
27 | 
28 | #ifdef LITESIMD_HAS_AVX
29 | 
30 | namespace litesimd {
31 | 
32 | struct avx_tag {};
33 | 
34 | } // namespace litesimd
35 | 
36 | #endif //LITESIMD_HAS_AVX
37 | 
38 | #endif //LITESIMD_AVX_TAG_H
39 | 


--------------------------------------------------------------------------------
/include/litesimd/detail/arch/sse/tag.h:
--------------------------------------------------------------------------------
 1 | // MIT License
 2 | //
 3 | // Copyright (c) 2018 André Tupinambá
 4 | //
 5 | // Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | // of this software and associated documentation files (the "Software"), to deal
 7 | // in the Software without restriction, including without limitation the rights
 8 | // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | // copies of the Software, and to permit persons to whom the Software is
10 | // furnished to do so, subject to the following conditions:
11 | //
12 | // The above copyright notice and this permission notice shall be included in all
13 | // copies or substantial portions of the Software.
14 | //
15 | // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | // SOFTWARE.
22 | 
23 | #ifndef LITESIMD_SSE_TAG_H
24 | #define LITESIMD_SSE_TAG_H
25 | 
26 | #include <litesimd/detail/arch/common/arch.h>
27 | 
28 | #ifdef LITESIMD_HAS_SSE
29 | 
30 | namespace litesimd {
31 | 
32 | struct sse_tag {};
33 | 
34 | } // namespace litesimd
35 | 
36 | 
37 | #endif //LITESIMD_HAS_SSE
38 | 
39 | #endif //LITESIMD_SSE_TAG_H
40 | 


--------------------------------------------------------------------------------
/include/litesimd/detail/arch/common/traits.h:
--------------------------------------------------------------------------------
 1 | // MIT License
 2 | //
 3 | // Copyright (c) 2018 André Tupinambá
 4 | //
 5 | // Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | // of this software and associated documentation files (the "Software"), to deal
 7 | // in the Software without restriction, including without limitation the rights
 8 | // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | // copies of the Software, and to permit persons to whom the Software is
10 | // furnished to do so, subject to the following conditions:
11 | //
12 | // The above copyright notice and this permission notice shall be included in all
13 | // copies or substantial portions of the Software.
14 | //
15 | // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | // SOFTWARE.
22 | 
23 | #ifndef LITESIMD_COMMON_TRAITS_H
24 | #define LITESIMD_COMMON_TRAITS_H
25 | 
26 | #include <litesimd/detail/arch/tag.h>
27 | 
28 | namespace litesimd {
29 | 
30 | template< typename ValueType_T, typename Tag_T = default_tag >
31 | struct traits{};
32 | 
33 | } // namespace litesimd
34 | 
35 | #endif //LITESIMD_COMMON_TRAITS_H
36 | 


--------------------------------------------------------------------------------
/include/litesimd/detail/arch/tag.h:
--------------------------------------------------------------------------------
 1 | // MIT License
 2 | //
 3 | // Copyright (c) 2018 André Tupinambá
 4 | //
 5 | // Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | // of this software and associated documentation files (the "Software"), to deal
 7 | // in the Software without restriction, including without limitation the rights
 8 | // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | // copies of the Software, and to permit persons to whom the Software is
10 | // furnished to do so, subject to the following conditions:
11 | //
12 | // The above copyright notice and this permission notice shall be included in all
13 | // copies or substantial portions of the Software.
14 | //
15 | // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | // SOFTWARE.
22 | 
23 | #ifndef LITESIMD_ARCH_TAG_H
24 | #define LITESIMD_ARCH_TAG_H
25 | 
26 | #include <litesimd/detail/arch/sse/tag.h>
27 | #include <litesimd/detail/arch/avx/tag.h>
28 | 
29 | namespace litesimd {
30 | 
31 | #if defined(LITESIMD_HAS_AVX)
32 | 
33 |     using default_tag = avx_tag;
34 | 
35 | #elif defined(LITESIMD_HAS_SSE)
36 | 
37 |     using default_tag = sse_tag;
38 | 
39 | #endif //LITESIMD_HAS_SSE
40 | 
41 | 
42 | } // namespace litesimd
43 | 
44 | #endif // LITESIMD_ARCH_TAG_H
45 | 


--------------------------------------------------------------------------------
/include/litesimd/detail/arch/avx/detail/compatibility.h:
--------------------------------------------------------------------------------
 1 | // MIT License
 2 | //
 3 | // Copyright (c) 2018 André Tupinambá
 4 | //
 5 | // Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | // of this software and associated documentation files (the "Software"), to deal
 7 | // in the Software without restriction, including without limitation the rights
 8 | // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | // copies of the Software, and to permit persons to whom the Software is
10 | // furnished to do so, subject to the following conditions:
11 | //
12 | // The above copyright notice and this permission notice shall be included in all
13 | // copies or substantial portions of the Software.
14 | //
15 | // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | // SOFTWARE.
22 | 
23 | #ifndef LITESIMD_AVX_DETAIL_COMPATIBILITY_H
24 | #define LITESIMD_AVX_DETAIL_COMPATIBILITY_H
25 | 
26 | #ifdef LITESIMD_HAS_AVX
27 | 
28 | #if defined(__GNUC__) && (__GNUC__ < 7) && !defined(__clang__)
29 | // old GCC workarounds
30 | static inline float _mm256_cvtss_f32( __m256 vec ) { return _mm_cvtss_f32( _mm256_extractf128_ps( vec, 0 ) ); }
31 | static inline double _mm256_cvtsd_f64( __m256d vec ) { return _mm_cvtsd_f64( _mm256_extractf128_pd( vec, 0 ) ); }
32 | #endif
33 | 
34 | #endif // LITESIMD_HAS_AVX
35 | #endif // LITESIMD_AVX_DETAIL_COMPATIBILITY_H
36 | 


--------------------------------------------------------------------------------
/include/litesimd/detail/arch/common/intravector.h:
--------------------------------------------------------------------------------
 1 | // MIT License
 2 | //
 3 | // Copyright (c) 2018 André Tupinambá
 4 | //
 5 | // Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | // of this software and associated documentation files (the "Software"), to deal
 7 | // in the Software without restriction, including without limitation the rights
 8 | // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | // copies of the Software, and to permit persons to whom the Software is
10 | // furnished to do so, subject to the following conditions:
11 | //
12 | // The above copyright notice and this permission notice shall be included in all
13 | // copies or substantial portions of the Software.
14 | //
15 | // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | // SOFTWARE.
22 | 
23 | #ifndef LITESIMD_ARCH_COMMON_INTRAVECTOR_H
24 | #define LITESIMD_ARCH_COMMON_INTRAVECTOR_H
25 | 
26 | #include <litesimd/types.h>
27 | 
28 | namespace litesimd {
29 | 
30 | // Generic intravector operation
31 | // ---------------------------------------------------------------------------------------
32 | template< typename ValueType_T, typename Tag_T >
33 | struct intravector_op
34 | {
35 |     template< typename Function_T >
36 |     inline simd_type< ValueType_T, Tag_T >
37 |     operator()( simd_type< ValueType_T, Tag_T >, Function_T ){}
38 | };
39 | 
40 | } // namespace litesimd
41 | 
42 | #endif // LITESIMD_ARCH_COMMON_INTRAVECTOR_H
43 | 


--------------------------------------------------------------------------------
/include/litesimd/algorithm.h:
--------------------------------------------------------------------------------
 1 | // MIT License
 2 | //
 3 | // Copyright (c) 2018 André Tupinambá
 4 | //
 5 | // Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | // of this software and associated documentation files (the "Software"), to deal
 7 | // in the Software without restriction, including without limitation the rights
 8 | // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | // copies of the Software, and to permit persons to whom the Software is
10 | // furnished to do so, subject to the following conditions:
11 | //
12 | // The above copyright notice and this permission notice shall be included in all
13 | // copies or substantial portions of the Software.
14 | //
15 | // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | // SOFTWARE.
22 | 
23 | #ifndef LITESIMD_ALGORITHM_H
24 | #define LITESIMD_ALGORITHM_H
25 | 
26 | #include <litesimd/detail/arch/sse/algorithm.h>
27 | #include <litesimd/detail/arch/avx/algorithm.h>
28 | #include <litesimd/algorithm/for_each.h>
29 | #include <litesimd/algorithm/iota.h>
30 | #include <litesimd/algorithm/minmax.h>
31 | #include <litesimd/intravector.h>
32 | 
33 | /**
34 |  * \defgroup algorithm Algorithm functions
35 |  *
36 |  * Algorithm defines a collection of functions especially designed to be used
37 |  * on each value inside the packed SIMD register.
38 |  *
39 |  * All this functions are accessable at `<litesimd/algorithm.h>`
40 |  */
41 | 
42 | #endif // LITESIMD_ALGORITHM_H
43 | 


--------------------------------------------------------------------------------
/include/litesimd/detail/arch/common/arch.h:
--------------------------------------------------------------------------------
 1 | // MIT License
 2 | //
 3 | // Copyright (c) 2018 André Tupinambá
 4 | //
 5 | // Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | // of this software and associated documentation files (the "Software"), to deal
 7 | // in the Software without restriction, including without limitation the rights
 8 | // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | // copies of the Software, and to permit persons to whom the Software is
10 | // furnished to do so, subject to the following conditions:
11 | //
12 | // The above copyright notice and this permission notice shall be included in all
13 | // copies or substantial portions of the Software.
14 | //
15 | // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | // SOFTWARE.
22 | 
23 | #ifndef LITESIMD_ARCH_COMMON_ARCH_H
24 | #define LITESIMD_ARCH_COMMON_ARCH_H
25 | 
26 | // Check windows
27 | #if defined(_WIN32)
28 | #define LITESIMD_HAS_SSE
29 | 
30 | #if defined(__AVX2__)
31 | #define LITESIMD_HAS_AVX
32 | #endif //__AVX2__
33 | 
34 | #endif //_WIN64
35 | 
36 | // Check GCC
37 | #if defined(__GNUC__)
38 | #if defined(__x86_64__)
39 | #define LITESIMD_HAS_SSE
40 | 
41 | #if defined(__AVX2__)
42 | #define LITESIMD_HAS_AVX
43 | #endif //__AVX2__
44 | 
45 | #endif //__x86_64__
46 | 
47 | #if defined(__ARM_ARCH_7__)
48 | #define LITESIMD_HAS_NEON
49 | #endif //__ARM_ARCH_7__
50 | 
51 | #endif //__GNUC__
52 | 
53 | 
54 | #endif // LITESIMD_ARCH_COMMON_ARCH_H
55 | 


--------------------------------------------------------------------------------
/include/litesimd/helpers/containers.h:
--------------------------------------------------------------------------------
 1 | // MIT License
 2 | //
 3 | // Copyright (c) 2018 André Tupinambá
 4 | //
 5 | // Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | // of this software and associated documentation files (the "Software"), to deal
 7 | // in the Software without restriction, including without limitation the rights
 8 | // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | // copies of the Software, and to permit persons to whom the Software is
10 | // furnished to do so, subject to the following conditions:
11 | //
12 | // The above copyright notice and this permission notice shall be included in all
13 | // copies or substantial portions of the Software.
14 | //
15 | // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | // SOFTWARE.
22 | 
23 | #ifndef LITESIMD_HELPERS_CONTAINERS_H
24 | #define LITESIMD_HELPERS_CONTAINERS_H
25 | 
26 | #include <vector>
27 | #include <string>
28 | #include <boost/align/aligned_allocator.hpp>
29 | 
30 | namespace litesimd {
31 | 
32 | /**
33 |  * \defgroup helpers Helper classes and compatibility
34 |  *
35 |  * Helpers defines a group of type substitution and `std` compatibility functions
36 |  * and classes.
37 |  *
38 |  * Each file must be included separately from the path `<litesimd/helpers/[file].h>`
39 |  */
40 | 
41 | /**
42 |  * \ingroup helpers
43 |  * \brief Aligned `std::vector`
44 |  *
45 |  * This type is a drop-in substitute to `std::vector` with aligned allocator.
46 |  *
47 |  * Depends on `boost::align`.
48 |  */
49 | template< typename Val_T >
50 | using vector = std::vector< Val_T, boost::alignment::aligned_allocator<Val_T, 64> >;
51 | 
52 | /**
53 |  * \ingroup helpers
54 |  * \brief Aligned `std::string`
55 |  *
56 |  * This type is a drop-in substitute to `std::string` with aligned allocator.
57 |  *
58 |  * Depends on `boost::align`.
59 |  */
60 | using string = std::basic_string< char,
61 |                                   std::char_traits<char>,
62 |                                   boost::alignment::aligned_allocator<char, 64> >;
63 | 
64 | } // namespace litesimd
65 | 
66 | #endif // LITESIMD_HELPERS_CONTAINERS_H
67 | 


--------------------------------------------------------------------------------
/include/litesimd/algorithm/detail/for_each_loop.h:
--------------------------------------------------------------------------------
 1 | // MIT License
 2 | //
 3 | // Copyright (c) 2018 André Tupinambá
 4 | //
 5 | // Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | // of this software and associated documentation files (the "Software"), to deal
 7 | // in the Software without restriction, including without limitation the rights
 8 | // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | // copies of the Software, and to permit persons to whom the Software is
10 | // furnished to do so, subject to the following conditions:
11 | //
12 | // The above copyright notice and this permission notice shall be included in all
13 | // copies or substantial portions of the Software.
14 | //
15 | // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | // SOFTWARE.
22 | 
23 | #ifndef LITESIMD_ALGORITHM_DETAIL_FOREACHLOOP_H
24 | #define LITESIMD_ALGORITHM_DETAIL_FOREACHLOOP_H
25 | 
26 | namespace litesimd {
27 | namespace detail {
28 | 
29 | template< int index, typename SimdType_T, typename Function_T >
30 | struct for_each_loop
31 | {
32 |     bool operator()( bool forward, SimdType_T vec, Function_T func )
33 |     {
34 |         using type = typename SimdType_T::simd_value_type;
35 |         using tag = typename SimdType_T::simd_tag;
36 |         if( forward )
37 |         {
38 |             if( for_each_loop< index-1, SimdType_T, Function_T >()( true, vec, func ) )
39 |             {
40 |                 return func( index, get< index, type, tag >( vec ) );
41 |             }
42 |         }
43 |         else
44 |         {
45 |             if( func( index, get< index, type, tag >( vec ) ) )
46 |             {
47 |                 return for_each_loop< index-1, SimdType_T, Function_T >()( false, vec, func );
48 |             }
49 |         }
50 |         return false;
51 |     }
52 | };
53 | 
54 | template< typename SimdType_T, typename Function_T >
55 | struct for_each_loop< 0, SimdType_T, Function_T >
56 | {
57 |     bool operator()( bool, SimdType_T vec, Function_T func )
58 |     {
59 |         using type = typename SimdType_T::simd_value_type;
60 |         using tag = typename SimdType_T::simd_tag;
61 | 
62 |         return func( 0, get< 0, type, tag >( vec ) );
63 |     }
64 | };
65 | 
66 | }} // namespace litesimd::detail
67 | 
68 | #endif // LITESIMD_ALGORITHM_DETAIL_FOREACHLOOP_H
69 | 


--------------------------------------------------------------------------------
/test/CMakeLists.txt:
--------------------------------------------------------------------------------
 1 | project(litesimd_test)
 2 | 
 3 | find_package(Threads REQUIRED)
 4 | 
 5 | # Enable ExternalProject CMake module
 6 | include(ExternalProject)
 7 | 
 8 | list(APPEND GTEST_CMAKE_ARGS "-DCMAKE_C_COMPILER=${CMAKE_C_COMPILER}")
 9 | list(APPEND GTEST_CMAKE_ARGS "-DCMAKE_CXX_COMPILER=${CMAKE_CXX_COMPILER}")
10 | if(MSVC)
11 |     list(APPEND GTEST_CMAKE_ARGS "-DBUILD_SHARED_LIBS=ON")
12 | endif()
13 | 
14 | # Download and install GoogleTest
15 | ExternalProject_Add(
16 |     gtest
17 |     URL https://github.com/google/googletest/archive/release-1.8.1.zip
18 |     PREFIX ${CMAKE_CURRENT_BINARY_DIR}/gtest
19 |     # Disable install step
20 |     INSTALL_COMMAND ""
21 |     CMAKE_ARGS ${GTEST_CMAKE_ARGS}
22 | )
23 | 
24 | # Get GTest source and binary directories from CMake project
25 | ExternalProject_Get_Property(gtest source_dir binary_dir)
26 | 
27 | # Create a libgtest target to be used as a dependency by test programs
28 | add_library(libgtest IMPORTED STATIC GLOBAL)
29 | add_dependencies(libgtest gtest)
30 | 
31 | # Set libgtest properties
32 | if(MSVC)
33 | set_target_properties(libgtest PROPERTIES
34 |     "IMPORTED_LOCATION" "${binary_dir}/googlemock/gtest/Release/gtest.lib"
35 |     "IMPORTED_LINK_INTERFACE_LIBRARIES" "${CMAKE_THREAD_LIBS_INIT}"
36 | )
37 | else()
38 | set_target_properties(libgtest PROPERTIES
39 |     "IMPORTED_LOCATION" "${binary_dir}/googlemock/gtest/libgtest.a"
40 |     "IMPORTED_LINK_INTERFACE_LIBRARIES" "${CMAKE_THREAD_LIBS_INIT}"
41 | )
42 | endif()
43 | 
44 | # Create a libgmock target to be used as a dependency by test programs
45 | #add_library(libgmock IMPORTED STATIC GLOBAL)
46 | #add_dependencies(libgmock gtest)
47 | #
48 | ## Set libgmock properties
49 | #set_target_properties(libgmock PROPERTIES
50 | #    "IMPORTED_LOCATION" "${binary_dir}/googlemock/libgmock.a"
51 | #    "IMPORTED_LINK_INTERFACE_LIBRARIES" "${CMAKE_THREAD_LIBS_INIT}"
52 | #)
53 | 
54 | cmake_policy(SET CMP0054 NEW)
55 | if(CMAKE_VERSION VERSION_LESS "3.10")
56 |     cmake_policy(SET CMP0057 NEW)
57 | endif()
58 | 
59 | if(CMAKE_VERSION VERSION_LESS "3.9")
60 |     find_package(GTest)
61 | else()
62 |     include(GoogleTest)
63 | endif()
64 | 
65 | # Test project
66 | aux_source_directory(. SRC_LIST)
67 | add_executable(${PROJECT_NAME}
68 | 	${SRC_LIST}
69 | )
70 | 
71 | target_include_directories(${PROJECT_NAME}
72 | 	SYSTEM PUBLIC
73 |     ${Boost_INCLUDE_DIRS}
74 |     "${source_dir}/googletest/include"
75 | )
76 | 
77 | target_link_libraries(${PROJECT_NAME} PRIVATE
78 |     litesimd
79 |     ${Boost_LIBRARIES}
80 |     libgtest
81 | )
82 | 
83 | set_property(TARGET ${PROJECT_NAME} PROPERTY CXX_STANDARD 11)
84 | 
85 | if(CMAKE_VERSION VERSION_LESS "3.9")
86 |     GTEST_ADD_TESTS(${PROJECT_NAME} "" AUTO)
87 | elseif(CMAKE_VERSION VERSION_LESS "3.10")
88 |     gtest_add_tests(TARGET ${PROJECT_NAME})
89 | else()
90 |     gtest_discover_tests(${PROJECT_NAME})
91 | endif()
92 | 


--------------------------------------------------------------------------------
/include/litesimd/algorithm/iota.h:
--------------------------------------------------------------------------------
 1 | // MIT License
 2 | //
 3 | // Copyright (c) 2018 André Tupinambá
 4 | //
 5 | // Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | // of this software and associated documentation files (the "Software"), to deal
 7 | // in the Software without restriction, including without limitation the rights
 8 | // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | // copies of the Software, and to permit persons to whom the Software is
10 | // furnished to do so, subject to the following conditions:
11 | //
12 | // The above copyright notice and this permission notice shall be included in all
13 | // copies or substantial portions of the Software.
14 | //
15 | // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | // SOFTWARE.
22 | 
23 | #ifndef LITESIMD_ALGORITHM_IOTA_H
24 | #define LITESIMD_ALGORITHM_IOTA_H
25 | 
26 | #include <litesimd/arithmetic.h>
27 | 
28 | namespace litesimd {
29 | 
30 | /**
31 |  * \ingroup algorithm
32 |  * \brief Store increasing sequence on a SIMD register
33 |  *
34 |  * Assigns to every element in SIMD register successive values of _val_, as if incremented with `++val` after each element is written.
35 |  *
36 |  * \param val Start value (optional, default 0)
37 |  * \returns SIMD register with increasing values
38 |  *
39 |  * **Example**
40 |  * ```{.cpp}
41 |  * #include <iostream>
42 |  * #include <litesimd/types.h>
43 |  * #include <litesimd/algorithm.h>
44 |  * #include <litesimd/helpers/iostream.h>
45 |  *
46 |  * int main()
47 |  * {
48 |  *     namespace ls = litesimd;
49 |  *
50 |  *     std::cout << "iota< int32_t >(): " << ls::iota< int32_t >() << std::endl;
51 |  *     std::cout << "iota< int16_t >( 4 ): " << ls::iota< int16_t >( 4 ) << std::endl;
52 |  *     std::cout << "iota< double >( 1.2 ): " << ls::iota< double >( 1.2 ) << std::endl;
53 |  *     return 0;
54 |  * }
55 |  * ```
56 |  * Output on a SSE compilation
57 |  * ```
58 |  * iota< int32_t >(): (3, 2, 1, 0)
59 |  * iota< int16_t >( 4 ): (11, 10, 9, 8, 7, 6, 5, 4)
60 |  * iota< double >( 1.2 ): (2.2, 1.2)
61 |  * ```
62 |  */
63 | template< typename ValueType_T, typename Tag_T = default_tag >
64 | inline simd_type< ValueType_T, Tag_T >
65 | iota( ValueType_T val )
66 | {
67 |     return add( val, simd_type< ValueType_T, Tag_T >::iota() );
68 | }
69 | 
70 | template< typename ValueType_T, typename Tag_T = default_tag >
71 | inline simd_type< ValueType_T, Tag_T >
72 | iota()
73 | {
74 |     return simd_type< ValueType_T, Tag_T >::iota();
75 | }
76 | 
77 | } // namespace litesimd
78 | 
79 | #endif // LITESIMD_ALGORITHM_IOTA_H
80 | 


--------------------------------------------------------------------------------
/include/litesimd/helpers/iostream.h:
--------------------------------------------------------------------------------
 1 | // MIT License
 2 | //
 3 | // Copyright (c) 2018 André Tupinambá
 4 | //
 5 | // Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | // of this software and associated documentation files (the "Software"), to deal
 7 | // in the Software without restriction, including without limitation the rights
 8 | // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | // copies of the Software, and to permit persons to whom the Software is
10 | // furnished to do so, subject to the following conditions:
11 | //
12 | // The above copyright notice and this permission notice shall be included in all
13 | // copies or substantial portions of the Software.
14 | //
15 | // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | // SOFTWARE.
22 | 
23 | #ifndef LITESIMD_HELPERS_IOSTREAM_H
24 | #define LITESIMD_HELPERS_IOSTREAM_H
25 | 
26 | #include <iostream>
27 | #include <iomanip>
28 | #include <type_traits>
29 | #include <litesimd/types.h>
30 | #include <litesimd/shuffle.h>
31 | #include <litesimd/algorithm.h>
32 | 
33 | namespace litesimd {
34 | 
35 | /**
36 |  * \ingroup helpers
37 |  * \brief Compatibility version to `operator<<` to output a `simd_type` on `ostream`.
38 |  *
39 |  * \param out Output stream
40 |  * \param vec simd_type to print to stream
41 |  * \return The output stream
42 |  */
43 | template< typename SimdType_T,
44 |           typename std::enable_if<std::is_integral<typename SimdType_T::simd_value_type>::value>::type* = nullptr >
45 | inline std::ostream& operator<<( std::ostream& out, SimdType_T vec )
46 | {
47 |     std::ios_base::fmtflags f( out.flags() );
48 | 
49 |     out << "(";
50 |     for_each_backward( vec, [&out]( int index, typename SimdType_T::simd_value_type val ) -> bool
51 |     {
52 |         constexpr size_t mask = (1 << (2*sizeof(typename SimdType_T::simd_value_type)))-1;
53 |         out << (+val & mask);
54 |         if( index > 0 )
55 |             out << ", ";
56 |         return true;
57 |     } );
58 |     out << ")";
59 | 
60 |     out.flags( f );
61 | 
62 |     return out;
63 | }
64 | 
65 | template< typename SimdType_T,
66 |           typename std::enable_if<std::is_floating_point<typename SimdType_T::simd_value_type>::value>::type* = nullptr >
67 | inline std::ostream& operator<<( std::ostream& out, SimdType_T vec )
68 | {
69 |     std::ios_base::fmtflags f( out.flags() );
70 | 
71 |     out << "(";
72 |     for_each_backward( vec, [&out]( int index, typename SimdType_T::simd_value_type val ) -> bool
73 |     {
74 |         out << val;
75 |         if( index > 0 )
76 |             out << ", ";
77 |         return true;
78 |     } );
79 |     out << ")";
80 | 
81 |     out.flags( f );
82 | 
83 |     return out;
84 | }
85 | 
86 | } // namespace litesimd
87 | 
88 | #endif // LITESIMD_HELPERS_IOSTREAM_H
89 | 


--------------------------------------------------------------------------------
/include/litesimd/detail/arch/sse/algorithm.h:
--------------------------------------------------------------------------------
 1 | // MIT License
 2 | //
 3 | // Copyright (c) 2018 André Tupinambá
 4 | //
 5 | // Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | // of this software and associated documentation files (the "Software"), to deal
 7 | // in the Software without restriction, including without limitation the rights
 8 | // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | // copies of the Software, and to permit persons to whom the Software is
10 | // furnished to do so, subject to the following conditions:
11 | //
12 | // The above copyright notice and this permission notice shall be included in all
13 | // copies or substantial portions of the Software.
14 | //
15 | // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | // SOFTWARE.
22 | 
23 | #ifndef LITESIMD_ARCH_SSE_ALGORITHM_H
24 | #define LITESIMD_ARCH_SSE_ALGORITHM_H
25 | 
26 | #ifdef LITESIMD_HAS_SSE
27 | 
28 | #include <litesimd/detail/arch/common/algorithm.h>
29 | #include <litesimd/compare.h>
30 | #include <litesimd/shuffle.h>
31 | 
32 | namespace litesimd {
33 | 
34 | // Min max
35 | // ---------------------------------------------------------------------------------------
36 | #define DEF_BINARY_MIN( TYPE_T, CMD ) \
37 | template<> inline simd_type< TYPE_T, sse_tag > \
38 | min< TYPE_T, sse_tag >( simd_type< TYPE_T, sse_tag > lhs, \
39 |                         simd_type< TYPE_T, sse_tag > rhs ) { \
40 |     return CMD( lhs, rhs ); }
41 | 
42 | DEF_BINARY_MIN( int8_t,  _mm_min_epi8 )
43 | DEF_BINARY_MIN( int16_t, _mm_min_epi16 )
44 | DEF_BINARY_MIN( int32_t, _mm_min_epi32 )
45 | DEF_BINARY_MIN( float,   _mm_min_ps )
46 | DEF_BINARY_MIN( double,  _mm_min_pd )
47 | #undef DEF_BINARY_MIN
48 | 
49 | #define DEF_BINARY_MAX( TYPE_T, CMD ) \
50 | template<> inline simd_type< TYPE_T, sse_tag > \
51 | max< TYPE_T, sse_tag >( simd_type< TYPE_T, sse_tag > lhs, \
52 |                         simd_type< TYPE_T, sse_tag > rhs ) { \
53 |     return CMD( lhs, rhs ); }
54 | 
55 | DEF_BINARY_MAX( int8_t,  _mm_max_epi8 )
56 | DEF_BINARY_MAX( int16_t, _mm_max_epi16 )
57 | DEF_BINARY_MAX( int32_t, _mm_max_epi32 )
58 | DEF_BINARY_MAX( float,   _mm_max_ps )
59 | DEF_BINARY_MAX( double,  _mm_max_pd )
60 | #undef DEF_BINARY_MAX
61 | 
62 | template<> inline simd_type< int64_t, sse_tag >
63 | min< int64_t, sse_tag >( simd_type< int64_t, sse_tag > lhs, simd_type< int64_t, sse_tag > rhs )
64 | {
65 |     auto mask = greater< int64_t, sse_tag >( lhs, rhs );
66 |     return blend< int64_t, sse_tag >( mask, rhs, lhs );
67 | }
68 | 
69 | template<> inline simd_type< int64_t, sse_tag >
70 | max< int64_t, sse_tag >( simd_type< int64_t, sse_tag > lhs, simd_type< int64_t, sse_tag > rhs )
71 | {
72 |     auto mask = greater< int64_t, sse_tag >( lhs, rhs );
73 |     return blend< int64_t, sse_tag >( mask, lhs, rhs );
74 | }
75 | 
76 | } // namespace litesimd
77 | 
78 | #endif // LITESIMD_HAS_SSE
79 | #endif // LITESIMD_ARCH_SSE_ALGORITHM_H
80 | 


--------------------------------------------------------------------------------
/include/litesimd/detail/arch/avx/algorithm.h:
--------------------------------------------------------------------------------
 1 | // MIT License
 2 | //
 3 | // Copyright (c) 2018 André Tupinambá
 4 | //
 5 | // Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | // of this software and associated documentation files (the "Software"), to deal
 7 | // in the Software without restriction, including without limitation the rights
 8 | // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | // copies of the Software, and to permit persons to whom the Software is
10 | // furnished to do so, subject to the following conditions:
11 | //
12 | // The above copyright notice and this permission notice shall be included in all
13 | // copies or substantial portions of the Software.
14 | //
15 | // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | // SOFTWARE.
22 | 
23 | #ifndef LITESIMD_ARCH_AVX_ALGORITHM_H
24 | #define LITESIMD_ARCH_AVX_ALGORITHM_H
25 | 
26 | #ifdef LITESIMD_HAS_AVX
27 | 
28 | #include <litesimd/detail/arch/common/algorithm.h>
29 | #include <litesimd/compare.h>
30 | #include <litesimd/shuffle.h>
31 | 
32 | namespace litesimd {
33 | 
34 | // Min max
35 | // ---------------------------------------------------------------------------------------
36 | #define DEF_BINARY_MIN( TYPE_T, CMD ) \
37 | template<> inline simd_type< TYPE_T, avx_tag > \
38 | min< TYPE_T, avx_tag >( simd_type< TYPE_T, avx_tag > lhs, \
39 |                         simd_type< TYPE_T, avx_tag > rhs ) { \
40 |     return CMD( lhs, rhs ); }
41 | 
42 | DEF_BINARY_MIN( int8_t,  _mm256_min_epi8 )
43 | DEF_BINARY_MIN( int16_t, _mm256_min_epi16 )
44 | DEF_BINARY_MIN( int32_t, _mm256_min_epi32 )
45 | DEF_BINARY_MIN( float,   _mm256_min_ps )
46 | DEF_BINARY_MIN( double,  _mm256_min_pd )
47 | #undef DEF_BINARY_MIN
48 | 
49 | #define DEF_BINARY_MAX( TYPE_T, CMD ) \
50 | template<> inline simd_type< TYPE_T, avx_tag > \
51 | max< TYPE_T, avx_tag >( simd_type< TYPE_T, avx_tag > lhs, \
52 |                         simd_type< TYPE_T, avx_tag > rhs ) { \
53 |     return CMD( lhs, rhs ); }
54 | 
55 | DEF_BINARY_MAX( int8_t,  _mm256_max_epi8 )
56 | DEF_BINARY_MAX( int16_t, _mm256_max_epi16 )
57 | DEF_BINARY_MAX( int32_t, _mm256_max_epi32 )
58 | DEF_BINARY_MAX( float,   _mm256_max_ps )
59 | DEF_BINARY_MAX( double,  _mm256_max_pd )
60 | #undef DEF_BINARY_MAX
61 | 
62 | template<> inline simd_type< int64_t, avx_tag >
63 | min< int64_t, avx_tag >( simd_type< int64_t, avx_tag > lhs, simd_type< int64_t, avx_tag > rhs )
64 | {
65 |     auto mask = greater< int64_t, avx_tag >( lhs, rhs );
66 |     return blend< int64_t, avx_tag >( mask, rhs, lhs );
67 | }
68 | 
69 | template<> inline simd_type< int64_t, avx_tag >
70 | max< int64_t, avx_tag >( simd_type< int64_t, avx_tag > lhs, simd_type< int64_t, avx_tag > rhs )
71 | {
72 |     auto mask = greater< int64_t, avx_tag >( lhs, rhs );
73 |     return blend< int64_t, avx_tag >( mask, lhs, rhs );
74 | }
75 | 
76 | } // namespace litesimd
77 | 
78 | #endif // LITESIMD_HAS_AVX
79 | #endif // LITESIMD_ARCH_AVX_ALGORITHM_H
80 | 


--------------------------------------------------------------------------------
/include/litesimd/detail/arch/sse/bitwise.h:
--------------------------------------------------------------------------------
 1 | // MIT License
 2 | //
 3 | // Copyright (c) 2018 André Tupinambá
 4 | //
 5 | // Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | // of this software and associated documentation files (the "Software"), to deal
 7 | // in the Software without restriction, including without limitation the rights
 8 | // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | // copies of the Software, and to permit persons to whom the Software is
10 | // furnished to do so, subject to the following conditions:
11 | //
12 | // The above copyright notice and this permission notice shall be included in all
13 | // copies or substantial portions of the Software.
14 | //
15 | // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | // SOFTWARE.
22 | 
23 | #ifndef LITESIMD_SSE_BITWISE_H
24 | #define LITESIMD_SSE_BITWISE_H
25 | 
26 | #ifdef LITESIMD_HAS_SSE
27 | 
28 | #include <litesimd/types.h>
29 | #include <litesimd/detail/arch/common/bitwise.h>
30 | 
31 | namespace litesimd {
32 | 
33 | // Bit AND
34 | // ---------------------------------------------------------------------------------------
35 | #define DEF_BIT_AND( TYPE_T, CMD ) \
36 | template<> inline simd_type< TYPE_T, sse_tag > \
37 | bit_and< TYPE_T, sse_tag >( simd_type< TYPE_T, sse_tag > lhs, \
38 |                             simd_type< TYPE_T, sse_tag > rhs ) { \
39 |     return CMD( lhs, rhs ); }
40 | 
41 | DEF_BIT_AND( int8_t,  _mm_and_si128 )
42 | DEF_BIT_AND( int16_t, _mm_and_si128 )
43 | DEF_BIT_AND( int32_t, _mm_and_si128 )
44 | DEF_BIT_AND( int64_t, _mm_and_si128 )
45 | DEF_BIT_AND( float,   _mm_and_ps )
46 | DEF_BIT_AND( double,  _mm_and_pd )
47 | #undef DEF_BIT_AND
48 | 
49 | // Bit OR
50 | // ---------------------------------------------------------------------------------------
51 | #define DEF_BIT_OR( TYPE_T, CMD ) \
52 | template<> inline simd_type< TYPE_T, sse_tag > \
53 | bit_or< TYPE_T, sse_tag >( simd_type< TYPE_T, sse_tag > lhs, \
54 |                            simd_type< TYPE_T, sse_tag > rhs ) { \
55 |     return CMD( lhs, rhs ); }
56 | 
57 | DEF_BIT_OR( int8_t,  _mm_or_si128 )
58 | DEF_BIT_OR( int16_t, _mm_or_si128 )
59 | DEF_BIT_OR( int32_t, _mm_or_si128 )
60 | DEF_BIT_OR( int64_t, _mm_or_si128 )
61 | DEF_BIT_OR( float,   _mm_or_ps )
62 | DEF_BIT_OR( double,  _mm_or_pd )
63 | #undef DEF_BIT_OR
64 | 
65 | // Bit XOR
66 | // ---------------------------------------------------------------------------------------
67 | #define DEF_BIT_XOR( TYPE_T, CMD ) \
68 | template<> inline simd_type< TYPE_T, sse_tag > \
69 | bit_xor< TYPE_T, sse_tag >( simd_type< TYPE_T, sse_tag > lhs, \
70 |                             simd_type< TYPE_T, sse_tag > rhs ) { \
71 |     return CMD( lhs, rhs ); }
72 | 
73 | DEF_BIT_XOR( int8_t,  _mm_xor_si128 )
74 | DEF_BIT_XOR( int16_t, _mm_xor_si128 )
75 | DEF_BIT_XOR( int32_t, _mm_xor_si128 )
76 | DEF_BIT_XOR( int64_t, _mm_xor_si128 )
77 | DEF_BIT_XOR( float,   _mm_xor_ps )
78 | DEF_BIT_XOR( double,  _mm_xor_pd )
79 | #undef DEF_BIT_XOR
80 | 
81 | } // namespace litesimd
82 | 
83 | #endif // LITESIMD_HAS_SSE
84 | #endif // LITESIMD_SSE_BITWISE_H
85 | 


--------------------------------------------------------------------------------
/include/litesimd/detail/arch/avx/bitwise.h:
--------------------------------------------------------------------------------
 1 | // MIT License
 2 | //
 3 | // Copyright (c) 2018 André Tupinambá
 4 | //
 5 | // Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | // of this software and associated documentation files (the "Software"), to deal
 7 | // in the Software without restriction, including without limitation the rights
 8 | // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | // copies of the Software, and to permit persons to whom the Software is
10 | // furnished to do so, subject to the following conditions:
11 | //
12 | // The above copyright notice and this permission notice shall be included in all
13 | // copies or substantial portions of the Software.
14 | //
15 | // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | // SOFTWARE.
22 | 
23 | #ifndef LITESIMD_AVX_BITWISE_H
24 | #define LITESIMD_AVX_BITWISE_H
25 | 
26 | #ifdef LITESIMD_HAS_AVX
27 | 
28 | #include <litesimd/types.h>
29 | #include <litesimd/detail/arch/common/bitwise.h>
30 | 
31 | namespace litesimd {
32 | 
33 | // Bit AND
34 | // ---------------------------------------------------------------------------------------
35 | #define DEF_BIT_AND( TYPE_T, CMD ) \
36 | template<> inline simd_type< TYPE_T, avx_tag > \
37 | bit_and< TYPE_T, avx_tag >( simd_type< TYPE_T, avx_tag > lhs, \
38 |                             simd_type< TYPE_T, avx_tag > rhs ) { \
39 |     return CMD( lhs, rhs ); }
40 | 
41 | DEF_BIT_AND( int8_t,  _mm256_and_si256 )
42 | DEF_BIT_AND( int16_t, _mm256_and_si256 )
43 | DEF_BIT_AND( int32_t, _mm256_and_si256 )
44 | DEF_BIT_AND( int64_t, _mm256_and_si256 )
45 | DEF_BIT_AND( float,   _mm256_and_ps )
46 | DEF_BIT_AND( double,  _mm256_and_pd )
47 | #undef DEF_BIT_AND
48 | 
49 | // Bit OR
50 | // ---------------------------------------------------------------------------------------
51 | #define DEF_BIT_OR( TYPE_T, CMD ) \
52 | template<> inline simd_type< TYPE_T, avx_tag > \
53 | bit_or< TYPE_T, avx_tag >( simd_type< TYPE_T, avx_tag > lhs, \
54 |                            simd_type< TYPE_T, avx_tag > rhs ) { \
55 |     return CMD( lhs, rhs ); }
56 | 
57 | DEF_BIT_OR( int8_t,  _mm256_or_si256 )
58 | DEF_BIT_OR( int16_t, _mm256_or_si256 )
59 | DEF_BIT_OR( int32_t, _mm256_or_si256 )
60 | DEF_BIT_OR( int64_t, _mm256_or_si256 )
61 | DEF_BIT_OR( float,   _mm256_or_ps )
62 | DEF_BIT_OR( double,  _mm256_or_pd )
63 | #undef DEF_BIT_OR
64 | 
65 | // Bit XOR
66 | // ---------------------------------------------------------------------------------------
67 | #define DEF_BIT_XOR( TYPE_T, CMD ) \
68 | template<> inline simd_type< TYPE_T, avx_tag > \
69 | bit_xor< TYPE_T, avx_tag >( simd_type< TYPE_T, avx_tag > lhs, \
70 |                             simd_type< TYPE_T, avx_tag > rhs ) { \
71 |     return CMD( lhs, rhs ); }
72 | 
73 | DEF_BIT_XOR( int8_t,  _mm256_xor_si256 )
74 | DEF_BIT_XOR( int16_t, _mm256_xor_si256 )
75 | DEF_BIT_XOR( int32_t, _mm256_xor_si256 )
76 | DEF_BIT_XOR( int64_t, _mm256_xor_si256 )
77 | DEF_BIT_XOR( float,   _mm256_xor_ps )
78 | DEF_BIT_XOR( double,  _mm256_xor_pd )
79 | #undef DEF_BIT_XOR
80 | 
81 | } // namespace litesimd
82 | 
83 | #endif // LITESIMD_HAS_AVX
84 | #endif // LITESIMD_AVX_BITWISE_H
85 | 


--------------------------------------------------------------------------------
/include/litesimd/detail/helper_macros.h:
--------------------------------------------------------------------------------
 1 | // MIT License
 2 | //
 3 | // Copyright (c) 2018 André Tupinambá
 4 | //
 5 | // Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | // of this software and associated documentation files (the "Software"), to deal
 7 | // in the Software without restriction, including without limitation the rights
 8 | // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | // copies of the Software, and to permit persons to whom the Software is
10 | // furnished to do so, subject to the following conditions:
11 | //
12 | // The above copyright notice and this permission notice shall be included in all
13 | // copies or substantial portions of the Software.
14 | //
15 | // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | // SOFTWARE.
22 | 
23 | #ifndef LITESIMD_DETAIL_HELPER_MACROS_H
24 | #define LITESIMD_DETAIL_HELPER_MACROS_H
25 | 
26 | // SIMD Functions
27 | // ---------------------------------------------------------------------------------------
28 | #define DEFINE_UNARY_SIMD_FUNCTION( TYPE_T, TAG_T, RET_T, PARAM_T, NAME, CMD ) \
29 | template<> inline typename simd_type< TYPE_T, TAG_T >::RET_T \
30 | NAME< TYPE_T, TAG_T >( typename simd_type< TYPE_T, TAG_T>::PARAM_T parm0 ) { \
31 |     return CMD( parm0 ); }
32 | 
33 | #define DEFINE_BINARY_SIMD_FUNCTION( TYPE_T, TAG_T, RET_T, PARAM_T, NAME, CMD ) \
34 | template<> inline typename simd_type< TYPE_T, TAG_T >::RET_T \
35 | NAME< TYPE_T, TAG_T >( typename simd_type< TYPE_T, TAG_T>::PARAM_T parm0, \
36 |                        typename simd_type< TYPE_T, TAG_T>::PARAM_T parm1 ) { \
37 |     return CMD( parm0, parm1 ); }
38 | 
39 | // Binary function adaptors
40 | // ---------------------------------------------------------------------------------------
41 | #define DEFINE_BINARY_FUNCTION_SIMD_TYPE_ADAPTOR( NAME, RET_T ) \
42 | template< typename SimdType_T, \
43 |           typename SimdType_T::simd_value_type* = nullptr > \
44 | inline typename simd_type< typename SimdType_T::simd_value_type, \
45 |                            typename SimdType_T::simd_tag >::RET_T \
46 | NAME( SimdType_T lhs, SimdType_T rhs ) { \
47 |     return NAME< typename SimdType_T::simd_value_type, \
48 |                  typename SimdType_T::simd_tag >( lhs, rhs ); }
49 | 
50 | #define DEFINE_BINARY_FUNCTION_LEFT_VALUE_ADAPTOR( NAME, RET_T ) \
51 | template< typename ValueType_T, typename Tag_T = default_tag > \
52 | inline typename simd_type< ValueType_T, Tag_T >::RET_T \
53 | NAME( ValueType_T lhs, simd_type< ValueType_T, Tag_T > rhs ) { \
54 |     return NAME< ValueType_T, Tag_T >( simd_type< ValueType_T, Tag_T >( lhs ), rhs ); }
55 | 
56 | #define DEFINE_BINARY_FUNCTION_RIGHT_VALUE_ADAPTOR( NAME, RET_T ) \
57 | template< typename ValueType_T, typename Tag_T = default_tag > \
58 | inline typename simd_type< ValueType_T, Tag_T >::RET_T \
59 | NAME( simd_type< ValueType_T, Tag_T > lhs, ValueType_T rhs ) { \
60 |     return NAME< ValueType_T, Tag_T >( lhs, simd_type< ValueType_T, Tag_T >( rhs ) ); }
61 | 
62 | #define DEFINE_BINARY_FUNCTION_ADAPTORS( NAME, RET_T ) \
63 | DEFINE_BINARY_FUNCTION_SIMD_TYPE_ADAPTOR( NAME, RET_T ) \
64 | DEFINE_BINARY_FUNCTION_LEFT_VALUE_ADAPTOR( NAME, RET_T ) \
65 | DEFINE_BINARY_FUNCTION_RIGHT_VALUE_ADAPTOR( NAME, RET_T )
66 | 
67 | #endif // LITESIMD_DETAIL_HELPER_MACROS_H
68 | 


--------------------------------------------------------------------------------
/.travis.yml:
--------------------------------------------------------------------------------
  1 | dist: trusty
  2 | language: cpp
  3 | sudo: false
  4 | 
  5 | common_sources: &all_sources
  6 |   - ubuntu-toolchain-r-test
  7 |   - llvm-toolchain-trusty
  8 |   - llvm-toolchain-trusty-3.9
  9 |   - llvm-toolchain-trusty-4.0
 10 |   - llvm-toolchain-trusty-5.0
 11 |   - llvm-toolchain-trusty-6.0
 12 |   - sourceline: 'ppa:mhier/libboost-latest'
 13 | 
 14 | matrix:
 15 |   exclude: # On OSX g++ is a symlink to clang++ by default
 16 |     - os: osx
 17 |       compiler: gcc
 18 | 
 19 |   include:
 20 |     - os: linux
 21 |       compiler: gcc
 22 |       addons:
 23 |         apt:
 24 |           sources: *all_sources
 25 |           packages: ['gcc-7', 'g++-7', 'cmake', 'boost1.67']
 26 |       env: MYCC='gcc-7' MYCXX='g++-7'
 27 | 
 28 |     - os: linux
 29 |       compiler: gcc
 30 |       addons:
 31 |         apt:
 32 |           sources: *all_sources
 33 |           packages: ['gcc-6', 'g++-6', 'cmake', 'boost1.67']
 34 |       env: MYCC='gcc-6' MYCXX='g++-6'
 35 | 
 36 |     - os: linux
 37 |       compiler: gcc
 38 |       addons:
 39 |         apt:
 40 |           sources: *all_sources
 41 |           packages: ['gcc-5', 'g++-5', 'cmake', 'boost1.67']
 42 |       env: MYCC='gcc-5' MYCXX='g++-5'
 43 | 
 44 |     - os: linux
 45 |       compiler: gcc
 46 |       addons:
 47 |         apt:
 48 |           sources: *all_sources
 49 |           packages: ['gcc-4.9', 'g++-4.9', 'cmake', 'boost1.67']
 50 |       env: MYCC='gcc-4.9' MYCXX='g++-4.9'
 51 | 
 52 |     - os: linux
 53 |       compiler: gcc
 54 |       addons:
 55 |         apt:
 56 |           sources: *all_sources
 57 |           packages: ['gcc-4.8', 'g++-4.8', 'cmake', 'boost1.67']
 58 |       env: MYCC='gcc-4.8' MYCXX='g++-4.8'
 59 | 
 60 |     - os: linux
 61 |       compiler: clang
 62 |       addons:
 63 |         apt:
 64 |           sources: *all_sources
 65 |           packages: ['clang-6.0', 'libstdc++-6-dev', 'cmake', 'boost1.67']
 66 |       env: MYCC='clang-6.0' MYCXX='clang++-6.0'
 67 | 
 68 |     - os: linux
 69 |       compiler: clang
 70 |       addons:
 71 |         apt:
 72 |           sources: *all_sources
 73 |           packages: ['clang-5.0', 'libstdc++-5-dev', 'cmake', 'boost1.67']
 74 |       env: MYCC='clang-5.0' MYCXX='clang++-5.0'
 75 | 
 76 |     - os: linux
 77 |       compiler: clang
 78 |       addons:
 79 |         apt:
 80 |           sources: *all_sources
 81 |           packages: ['clang-4.0', 'libstdc++-4.8-dev', 'cmake', 'boost1.67']
 82 |       env: MYCC='clang-4.0' MYCXX='clang++-4.0'
 83 | 
 84 |     - os: osx
 85 |       osx_image: xcode9.4
 86 |       compiler: clang
 87 |       env: MYCC='clang' MYCXX='clang++'
 88 | 
 89 |     - os: osx
 90 |       osx_image: xcode9.3
 91 |       compiler: clang
 92 |       env: MYCC='clang' MYCXX='clang++'
 93 | 
 94 |     - os: osx
 95 |       osx_image: xcode9.2
 96 |       compiler: clang
 97 |       env: MYCC='clang' MYCXX='clang++'
 98 | 
 99 |     - os: osx
100 |       osx_image: xcode9.1
101 |       compiler: clang
102 |       env: MYCC='clang' MYCXX='clang++'
103 | 
104 |     - os: osx
105 |       osx_image: xcode9
106 |       compiler: clang
107 |       env: MYCC='clang' MYCXX='clang++'
108 | 
109 |     - os: osx
110 |       osx_image: xcode8.3
111 |       compiler: clang
112 |       env: MYCC='clang' MYCXX='clang++'
113 | 
114 | before_script:
115 |   - |
116 |     if [[ "${TRAVIS_OS_NAME}" == "linux" ]]; then
117 |       CMAKE_URL="https://cmake.org/files/v3.12/cmake-3.12.2-Linux-x86_64.tar.gz"
118 |       mkdir cmake && travis_retry wget --no-check-certificate --quiet -O - ${CMAKE_URL} | tar --strip-components=1 -xz -C cmake
119 |       export PATH=${PWD}/cmake/bin:${PATH}
120 |     fi
121 | 
122 | script:
123 |   - export CC=${MYCC}
124 |   - export CXX=${MYCXX}
125 |   - cmake --version
126 |   - ${CXX} -v
127 |   - (mkdir build && cd build && cmake -DCMAKE_BUILD_TYPE=Release .. && cmake --build . && ctest -V .)
128 | 


--------------------------------------------------------------------------------
/include/litesimd/intravector.h:
--------------------------------------------------------------------------------
  1 | // MIT License
  2 | //
  3 | // Copyright (c) 2018 André Tupinambá
  4 | //
  5 | // Permission is hereby granted, free of charge, to any person obtaining a copy
  6 | // of this software and associated documentation files (the "Software"), to deal
  7 | // in the Software without restriction, including without limitation the rights
  8 | // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
  9 | // copies of the Software, and to permit persons to whom the Software is
 10 | // furnished to do so, subject to the following conditions:
 11 | //
 12 | // The above copyright notice and this permission notice shall be included in all
 13 | // copies or substantial portions of the Software.
 14 | //
 15 | // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 16 | // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 17 | // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
 18 | // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
 19 | // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
 20 | // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
 21 | // SOFTWARE.
 22 | 
 23 | #ifndef LITESIMD_INTRAVECTOR_H
 24 | #define LITESIMD_INTRAVECTOR_H
 25 | 
 26 | #include <litesimd/detail/arch/intravector.h>
 27 | 
 28 | namespace litesimd {
 29 | 
 30 | /**
 31 |  * \defgroup intravector Intravector operations
 32 |  *
 33 |  * In litesimd, the intravector group has functions which operates between
 34 |  * the values of one SIMD register.
 35 |  *
 36 |  * All this functions are accessable at `<litesimd/intravector.h>`
 37 |  */
 38 | 
 39 | /**
 40 |  * \ingroup intravector
 41 |  * \brief Apply a generic SIMD binary function to reduce all SIMD values to a single one.
 42 |  *
 43 |  * The SIMD binary function should receive 2 simd_type and return the same type.
 44 |  * Lambda functions could be used as well.
 45 |  *
 46 |  * ```{.cpp}
 47 |  * ls::t_int32_simd vec( 1 );
 48 |  * ls::horizontal( vec, []( ls::t_int32_simd x, ls::t_int32_simd y )
 49 |  * {
 50 |  *     return (x ^ y) | 1;
 51 |  * } );
 52 |  * ```
 53 |  *
 54 |  * \param vec SIMD register to be reduced
 55 |  * \param func SIMD binary function
 56 |  * \tparam ValueType_T Base type of original SIMD register
 57 |  * \tparam Function_T Binary function type
 58 |  * \returns The result of reduction
 59 |  *
 60 |  * **Example**
 61 |  * ```{.cpp}
 62 |  * #include <iostream>
 63 |  * #include <litesimd/types.h>
 64 |  * #include <litesimd/arithmetic.h>
 65 |  * #include <litesimd/intravector.h>
 66 |  *
 67 |  * int main()
 68 |  * {
 69 |  *     namespace ls = litesimd;
 70 |  *
 71 |  *     using func_t = ls::t_int32_simd(*)(ls::t_int32_simd, ls::t_int32_simd);
 72 |  *
 73 |  *     ls::t_int32_simd x( 1, 2, 3, 4 );
 74 |  *     std::cout << "horizontal( x, add ): "
 75 |  *               << ls::horizontal( x, static_cast< func_t >(ls::add< int32_t >) )
 76 |  *               << std::endl;
 77 |  *     return 0;
 78 |  * }
 79 |  * ```
 80 |  * Output on a SSE compilation
 81 |  * ```
 82 |  * horizontal( x, add ): 10
 83 |  * ```
 84 |  */
 85 | template< typename ValueType_T, typename Function_T, typename Tag_T = default_tag >
 86 | inline ValueType_T horizontal( simd_type< ValueType_T, Tag_T > vec, Function_T func )
 87 | {
 88 |     return intravector_op< ValueType_T, Tag_T >()( vec, func );
 89 | }
 90 | 
 91 | template< typename SimdType_T, typename Function_T,
 92 |           typename SimdType_T::simd_value_type* = nullptr >
 93 | inline typename SimdType_T::simd_value_type
 94 | horizontal( SimdType_T vec, Function_T func )
 95 | {
 96 |     return horizontal< typename SimdType_T::simd_value_type, Function_T,
 97 |                        typename SimdType_T::simd_tag >( vec, func );
 98 | }
 99 | 
100 | } // namespace litesimd
101 | 
102 | #endif // LITESIMD_INTRAVECTOR_H
103 | 


--------------------------------------------------------------------------------
/include/litesimd/arithmetic.h:
--------------------------------------------------------------------------------
  1 | // MIT License
  2 | //
  3 | // Copyright (c) 2018 André Tupinambá
  4 | //
  5 | // Permission is hereby granted, free of charge, to any person obtaining a copy
  6 | // of this software and associated documentation files (the "Software"), to deal
  7 | // in the Software without restriction, including without limitation the rights
  8 | // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
  9 | // copies of the Software, and to permit persons to whom the Software is
 10 | // furnished to do so, subject to the following conditions:
 11 | //
 12 | // The above copyright notice and this permission notice shall be included in all
 13 | // copies or substantial portions of the Software.
 14 | //
 15 | // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 16 | // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 17 | // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
 18 | // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
 19 | // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
 20 | // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
 21 | // SOFTWARE.
 22 | 
 23 | #ifndef LITESIMD_ARITHMETIC_H
 24 | #define LITESIMD_ARITHMETIC_H
 25 | 
 26 | #include <litesimd/types.h>
 27 | #include <litesimd/detail/arch/arithmetic.h>
 28 | #include <litesimd/detail/helper_macros.h>
 29 | 
 30 | namespace litesimd {
 31 | 
 32 | /**
 33 |  * \defgroup arithmetic Arithmetic operations
 34 |  *
 35 |  * In litesimd, arithmetic group has binary functions to execute the basic arithmetic operations
 36 |  * (addition, subtraction, multiplication and division).
 37 |  *
 38 |  * All this functions are accessable at `<litesimd/arithmetic.h>`
 39 |  */
 40 | 
 41 | // Basic operations
 42 | // ---------------------------------------------------------------------------------------
 43 | DEFINE_BINARY_FUNCTION_ADAPTORS( add, type )
 44 | DEFINE_BINARY_FUNCTION_ADAPTORS( sub, type )
 45 | DEFINE_BINARY_FUNCTION_ADAPTORS( mullo, type )
 46 | DEFINE_BINARY_FUNCTION_ADAPTORS( mulhi, type )
 47 | DEFINE_BINARY_FUNCTION_ADAPTORS( div, type )
 48 | 
 49 | /**
 50 |  * \ingroup arithmetic
 51 |  * \brief Returns the sum of two values.
 52 |  *
 53 |  * \param lhs, rhs Values to be added.
 54 |  * \returns Sum of the two values.
 55 |  *
 56 |  * **Example**
 57 |  * ```{.cpp}
 58 |  * #include <iostream>
 59 |  * #include <litesimd/types.h>
 60 |  * #include <litesimd/arithmetic.h>
 61 |  * #include <litesimd/helpers/iostream.h>
 62 |  *
 63 |  * int main()
 64 |  * {
 65 |  *     namespace ls = litesimd;
 66 |  *
 67 |  *     ls::t_int32_simd a( 1 ), b( 10 );
 68 |  *     std::cout << "a + b:  " << a + b << std::endl;
 69 |  *     return 0;
 70 |  * }
 71 |  * ```
 72 |  * Output on a SSE compilation
 73 |  * ```
 74 |  * a + b: (11, 11, 11, 11)
 75 |  * ```
 76 |  */
 77 | template< typename LHS, typename RHS >
 78 | inline auto operator+( LHS lhs, RHS rhs ) -> decltype( add( lhs, rhs ) )
 79 | {
 80 |     return add( lhs, rhs );
 81 | }
 82 | 
 83 | /**
 84 |  * \ingroup arithmetic
 85 |  * \brief Returns the subtraction of two values.
 86 |  *
 87 |  * \param lhs, rhs Values to be subtracted.
 88 |  * \returns Difference of the two values.
 89 |  *
 90 |  * **Example**
 91 |  * ```{.cpp}
 92 |  * #include <iostream>
 93 |  * #include <litesimd/types.h>
 94 |  * #include <litesimd/arithmetic.h>
 95 |  * #include <litesimd/helpers/iostream.h>
 96 |  *
 97 |  * int main()
 98 |  * {
 99 |  *     namespace ls = litesimd;
100 |  *
101 |  *     ls::t_int32_simd a( 10 ), b( 1 );
102 |  *     std::cout << "a - b:  " << a - b << std::endl;
103 |  *     return 0;
104 |  * }
105 |  * ```
106 |  * Output on a SSE compilation
107 |  * ```
108 |  * a - b: (9, 9, 9, 9)
109 |  * ```
110 |  */
111 | template< typename LHS, typename RHS >
112 | inline auto operator-( LHS lhs, RHS rhs ) -> decltype( sub( lhs, rhs ) )
113 | {
114 |     return sub( lhs, rhs );
115 | }
116 | 
117 | } // namespace litesimd
118 | 
119 | #endif // LITESIMD_ARITHMETIC_H
120 | 
121 | 


--------------------------------------------------------------------------------
/include/litesimd/detail/arch/sse/arithmetic.h:
--------------------------------------------------------------------------------
  1 | // MIT License
  2 | //
  3 | // Copyright (c) 2018 André Tupinambá
  4 | //
  5 | // Permission is hereby granted, free of charge, to any person obtaining a copy
  6 | // of this software and associated documentation files (the "Software"), to deal
  7 | // in the Software without restriction, including without limitation the rights
  8 | // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
  9 | // copies of the Software, and to permit persons to whom the Software is
 10 | // furnished to do so, subject to the following conditions:
 11 | //
 12 | // The above copyright notice and this permission notice shall be included in all
 13 | // copies or substantial portions of the Software.
 14 | //
 15 | // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 16 | // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 17 | // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
 18 | // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
 19 | // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
 20 | // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
 21 | // SOFTWARE.
 22 | 
 23 | #ifndef LITESIMD_SSE_ARITHMETIC_H
 24 | #define LITESIMD_SSE_ARITHMETIC_H
 25 | 
 26 | #ifdef LITESIMD_HAS_SSE
 27 | 
 28 | #include <litesimd/types.h>
 29 | #include <litesimd/detail/arch/common/arithmetic.h>
 30 | 
 31 | namespace litesimd {
 32 | 
 33 | // Add
 34 | // ---------------------------------------------------------------------------------------
 35 | #define DEF_ADD( TYPE_T, CMD ) \
 36 | template<> inline simd_type< TYPE_T, sse_tag > \
 37 | add< TYPE_T, sse_tag >( simd_type< TYPE_T, sse_tag > lhs, \
 38 |                         simd_type< TYPE_T, sse_tag > rhs ) { \
 39 |     return CMD( lhs, rhs ); }
 40 | 
 41 | DEF_ADD( int8_t,  _mm_add_epi8 )
 42 | DEF_ADD( int16_t, _mm_add_epi16 )
 43 | DEF_ADD( int32_t, _mm_add_epi32 )
 44 | DEF_ADD( int64_t, _mm_add_epi64 )
 45 | DEF_ADD( float,   _mm_add_ps )
 46 | DEF_ADD( double,  _mm_add_pd )
 47 | #undef DEF_ADD
 48 | 
 49 | // Sub
 50 | // ---------------------------------------------------------------------------------------
 51 | #define DEF_SUB( TYPE_T, CMD ) \
 52 | template<> inline simd_type< TYPE_T, sse_tag > \
 53 | sub< TYPE_T, sse_tag >( simd_type< TYPE_T, sse_tag > lhs, \
 54 |                         simd_type< TYPE_T, sse_tag > rhs ) { \
 55 |     return CMD( lhs, rhs ); }
 56 | 
 57 | DEF_SUB( int8_t,  _mm_sub_epi8 )
 58 | DEF_SUB( int16_t, _mm_sub_epi16 )
 59 | DEF_SUB( int32_t, _mm_sub_epi32 )
 60 | DEF_SUB( int64_t, _mm_sub_epi64 )
 61 | DEF_SUB( float,   _mm_sub_ps )
 62 | DEF_SUB( double,  _mm_sub_pd )
 63 | #undef DEF_SUB
 64 | 
 65 | // MulLo
 66 | // ---------------------------------------------------------------------------------------
 67 | #define DEF_MULLO( TYPE_T, CMD ) \
 68 | template<> inline simd_type< TYPE_T, sse_tag > \
 69 | mullo< TYPE_T, sse_tag >( simd_type< TYPE_T, sse_tag > lhs, \
 70 |                           simd_type< TYPE_T, sse_tag > rhs ) { \
 71 |     return CMD( lhs, rhs ); }
 72 | 
 73 | DEF_MULLO( int16_t, _mm_mullo_epi16 )
 74 | DEF_MULLO( int32_t, _mm_mullo_epi32 )
 75 | #undef DEF_MULLO
 76 | 
 77 | // MulHi
 78 | // ---------------------------------------------------------------------------------------
 79 | #define DEF_MULHI( TYPE_T, CMD ) \
 80 | template<> inline simd_type< TYPE_T, sse_tag > \
 81 | mulhi< TYPE_T, sse_tag >( simd_type< TYPE_T, sse_tag > lhs, \
 82 |                           simd_type< TYPE_T, sse_tag > rhs ) { \
 83 |     return CMD( lhs, rhs ); }
 84 | 
 85 | DEF_MULHI( int16_t, _mm_mulhi_epi16 )
 86 | #undef DEF_MULHI
 87 | 
 88 | // Div
 89 | // ---------------------------------------------------------------------------------------
 90 | #define DEF_DIV( TYPE_T, CMD ) \
 91 | template<> inline simd_type< TYPE_T, sse_tag > \
 92 | div< TYPE_T, sse_tag >( simd_type< TYPE_T, sse_tag > lhs, \
 93 |                         simd_type< TYPE_T, sse_tag > rhs ) { \
 94 |     return CMD( lhs, rhs ); }
 95 | 
 96 | DEF_DIV( float,   _mm_div_ps )
 97 | DEF_DIV( double,  _mm_div_pd )
 98 | #undef DEF_DIV
 99 | 
100 | } // namespace litesimd
101 | 
102 | #endif // LITESIMD_HAS_SSE
103 | #endif // LITESIMD_SSE_ARITHMETIC_H
104 | 


--------------------------------------------------------------------------------
/include/litesimd/detail/arch/avx/arithmetic.h:
--------------------------------------------------------------------------------
  1 | // MIT License
  2 | //
  3 | // Copyright (c) 2018 André Tupinambá
  4 | //
  5 | // Permission is hereby granted, free of charge, to any person obtaining a copy
  6 | // of this software and associated documentation files (the "Software"), to deal
  7 | // in the Software without restriction, including without limitation the rights
  8 | // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
  9 | // copies of the Software, and to permit persons to whom the Software is
 10 | // furnished to do so, subject to the following conditions:
 11 | //
 12 | // The above copyright notice and this permission notice shall be included in all
 13 | // copies or substantial portions of the Software.
 14 | //
 15 | // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 16 | // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 17 | // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
 18 | // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
 19 | // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
 20 | // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
 21 | // SOFTWARE.
 22 | 
 23 | #ifndef LITESIMD_AVX_ARITHMETIC_H
 24 | #define LITESIMD_AVX_ARITHMETIC_H
 25 | 
 26 | #ifdef LITESIMD_HAS_AVX
 27 | 
 28 | #include <litesimd/types.h>
 29 | #include <litesimd/detail/arch/common/arithmetic.h>
 30 | 
 31 | namespace litesimd {
 32 | 
 33 | // Add
 34 | // ---------------------------------------------------------------------------------------
 35 | #define DEF_ADD( TYPE_T, CMD ) \
 36 | template<> inline simd_type< TYPE_T, avx_tag > \
 37 | add< TYPE_T, avx_tag >( simd_type< TYPE_T, avx_tag > lhs, \
 38 |                         simd_type< TYPE_T, avx_tag > rhs ) { \
 39 |     return CMD( lhs, rhs ); }
 40 | 
 41 | DEF_ADD( int8_t,  _mm256_add_epi8 )
 42 | DEF_ADD( int16_t, _mm256_add_epi16 )
 43 | DEF_ADD( int32_t, _mm256_add_epi32 )
 44 | DEF_ADD( int64_t, _mm256_add_epi64 )
 45 | DEF_ADD( float,   _mm256_add_ps )
 46 | DEF_ADD( double,  _mm256_add_pd )
 47 | #undef DEF_ADD
 48 | 
 49 | // Sub
 50 | // ---------------------------------------------------------------------------------------
 51 | #define DEF_SUB( TYPE_T, CMD ) \
 52 | template<> inline simd_type< TYPE_T, avx_tag > \
 53 | sub< TYPE_T, avx_tag >( simd_type< TYPE_T, avx_tag > lhs, \
 54 |                         simd_type< TYPE_T, avx_tag > rhs ) { \
 55 |     return CMD( lhs, rhs ); }
 56 | 
 57 | DEF_SUB( int8_t,  _mm256_sub_epi8 )
 58 | DEF_SUB( int16_t, _mm256_sub_epi16 )
 59 | DEF_SUB( int32_t, _mm256_sub_epi32 )
 60 | DEF_SUB( int64_t, _mm256_sub_epi64 )
 61 | DEF_SUB( float,   _mm256_sub_ps )
 62 | DEF_SUB( double,  _mm256_sub_pd )
 63 | #undef DEF_SUB
 64 | 
 65 | // MulLo
 66 | // ---------------------------------------------------------------------------------------
 67 | #define DEF_MULLO( TYPE_T, CMD ) \
 68 | template<> inline simd_type< TYPE_T, avx_tag > \
 69 | mullo< TYPE_T, avx_tag >( simd_type< TYPE_T, avx_tag > lhs, \
 70 |                           simd_type< TYPE_T, avx_tag > rhs ) { \
 71 |     return CMD( lhs, rhs ); }
 72 | 
 73 | DEF_MULLO( int16_t, _mm256_mullo_epi16 )
 74 | DEF_MULLO( int32_t, _mm256_mullo_epi32 )
 75 | #undef DEF_MULLO
 76 | 
 77 | // MulHi
 78 | // ---------------------------------------------------------------------------------------
 79 | #define DEF_MULHI( TYPE_T, CMD ) \
 80 | template<> inline simd_type< TYPE_T, avx_tag > \
 81 | mulhi< TYPE_T, avx_tag >( simd_type< TYPE_T, avx_tag > lhs, \
 82 |                           simd_type< TYPE_T, avx_tag > rhs ) { \
 83 |     return CMD( lhs, rhs ); }
 84 | 
 85 | DEF_MULHI( int16_t, _mm256_mulhi_epi16 )
 86 | #undef DEF_MULHI
 87 | 
 88 | // Div
 89 | // ---------------------------------------------------------------------------------------
 90 | #define DEF_DIV( TYPE_T, CMD ) \
 91 | template<> inline simd_type< TYPE_T, avx_tag > \
 92 | div< TYPE_T, avx_tag >( simd_type< TYPE_T, avx_tag > lhs, \
 93 |                         simd_type< TYPE_T, avx_tag > rhs ) { \
 94 |     return CMD( lhs, rhs ); }
 95 | 
 96 | DEF_DIV( float,   _mm256_div_ps )
 97 | DEF_DIV( double,  _mm256_div_pd )
 98 | #undef DEF_DIV
 99 | 
100 | } // namespace litesimd
101 | 
102 | #endif // LITESIMD_HAS_AVX
103 | #endif // LITESIMD_AVX_ARITHMETIC_H
104 | 


--------------------------------------------------------------------------------
/samples/greater/greater.cpp:
--------------------------------------------------------------------------------
  1 | // MIT License
  2 | //
  3 | // Copyright (c) 2018 André Tupinambá
  4 | //
  5 | // Permission is hereby granted, free of charge, to any person obtaining a copy
  6 | // of this software and associated documentation files (the "Software"), to deal
  7 | // in the Software without restriction, including without limitation the rights
  8 | // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
  9 | // copies of the Software, and to permit persons to whom the Software is
 10 | // furnished to do so, subject to the following conditions:
 11 | //
 12 | // The above copyright notice and this permission notice shall be included in all
 13 | // copies or substantial portions of the Software.
 14 | //
 15 | // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 16 | // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 17 | // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
 18 | // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
 19 | // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
 20 | // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
 21 | // SOFTWARE.
 22 | 
 23 | // Compiled with
 24 | //
 25 | // g++ -std=c++14 -O3 -avx2 -I<path/to/litesimd/include> greater.cpp -o greater
 26 | 
 27 | #include <iostream>
 28 | #include <litesimd/compare.h>
 29 | #include <litesimd/helpers/iostream.h>
 30 | 
 31 | int main()
 32 | {
 33 |     namespace ls = litesimd;
 34 | 
 35 |     // int32_t vector in default instruction set (SSE/AVX)
 36 |     ls::t_int32_simd cmp;
 37 | 
 38 |     // litesimd types are intrincs compatible
 39 | #ifdef LITESIMD_HAS_AVX
 40 |     cmp = _mm256_set_epi32( 80, 70, 60, 50, 40, 30, 20, 10 );
 41 | #else
 42 |     // No AVX, using SSE
 43 |     cmp = _mm_set_epi32( 40, 30, 20, 10 );
 44 | #endif // LITESIMD_HAS_AVX
 45 | 
 46 |     int32_t val = 5;
 47 | 
 48 |     // int32_simd_size is how many int32_t fits on t_int32_simd (4 - SSE, 8 - AVX)
 49 |     for( size_t i = 0; i <= ls::t_int32_simd::simd_size; ++i )
 50 |     {
 51 |         // Compare 'val' against all 'cmp' values
 52 |         uint32_t mask = ls::greater_bitmask( val, cmp );
 53 | 
 54 |         // As 'cmp' is sorted, we can use the bitmask to find the
 55 |         // last item which 'val' is greater
 56 |         //
 57 |         // Returns values between [-1, ls::int32_simd_size)
 58 |         int index = ls::bitmask_last_index< int32_t >( mask );
 59 | 
 60 |         // greater_last_index could be called instead
 61 |         // greater_bitmask + bitmask_last_index
 62 |         //
 63 |         // int index = ls::greater_last_index( val, cmp );
 64 | 
 65 |         if( index < 0 )
 66 |         {
 67 |             std::cout << "The value " << val
 68 |                       << " is less than all values of " << cmp
 69 |                       << std::endl;
 70 |         }
 71 |         else if( index == ls::t_int32_simd::simd_size-1 )
 72 |         {
 73 |             std::cout << "The value " << val
 74 |                       << " is greater than all values of " << cmp
 75 |                       << std::endl;
 76 |         }
 77 |         else
 78 |         {
 79 |             std::cout << "The value " << val
 80 |                       << " is between items " << index
 81 |                       << " and " << index + 1
 82 |                       << " of " << cmp
 83 |                       << std::endl;
 84 |         }
 85 | 
 86 |         val += 10;
 87 |     }
 88 |     return 0;
 89 | }
 90 | 
 91 | // The output on AVX will be
 92 | //
 93 | // $ ./greater
 94 | // The value 5 is less than all values of (80, 70, 60, 50, 40, 30, 20, 10)
 95 | // The value 15 is between items 0 and 1 of (80, 70, 60, 50, 40, 30, 20, 10)
 96 | // The value 25 is between items 1 and 2 of (80, 70, 60, 50, 40, 30, 20, 10)
 97 | // The value 35 is between items 2 and 3 of (80, 70, 60, 50, 40, 30, 20, 10)
 98 | // The value 45 is between items 3 and 4 of (80, 70, 60, 50, 40, 30, 20, 10)
 99 | // The value 55 is between items 4 and 5 of (80, 70, 60, 50, 40, 30, 20, 10)
100 | // The value 65 is between items 5 and 6 of (80, 70, 60, 50, 40, 30, 20, 10)
101 | // The value 75 is between items 6 and 7 of (80, 70, 60, 50, 40, 30, 20, 10)
102 | // The value 85 is greater than all values of (80, 70, 60, 50, 40, 30, 20, 10)
103 | 


--------------------------------------------------------------------------------
/include/litesimd/detail/arch/sse/intravector.h:
--------------------------------------------------------------------------------
  1 | // MIT License
  2 | //
  3 | // Copyright (c) 2018 André Tupinambá
  4 | //
  5 | // Permission is hereby granted, free of charge, to any person obtaining a copy
  6 | // of this software and associated documentation files (the "Software"), to deal
  7 | // in the Software without restriction, including without limitation the rights
  8 | // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
  9 | // copies of the Software, and to permit persons to whom the Software is
 10 | // furnished to do so, subject to the following conditions:
 11 | //
 12 | // The above copyright notice and this permission notice shall be included in all
 13 | // copies or substantial portions of the Software.
 14 | //
 15 | // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 16 | // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 17 | // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
 18 | // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
 19 | // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
 20 | // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
 21 | // SOFTWARE.
 22 | 
 23 | #ifndef LITESIMD_ARCH_SSE_INTRAVECTOR_H
 24 | #define LITESIMD_ARCH_SSE_INTRAVECTOR_H
 25 | 
 26 | #ifdef LITESIMD_HAS_SSE
 27 | 
 28 | #include <litesimd/detail/arch/common/intravector.h>
 29 | 
 30 | namespace litesimd {
 31 | 
 32 | template<>
 33 | struct intravector_op< int8_t, sse_tag >
 34 | {
 35 |     template< typename Function_T >
 36 |     int8_t inline operator()( simd_type< int8_t, sse_tag > vec, Function_T func )
 37 |     {
 38 |         vec = func( vec, _mm_shuffle_epi32( vec, _MM_SHUFFLE( 0, 0, 3, 2 ) ) );
 39 |         vec = func( vec, _mm_shuffle_epi32( vec, _MM_SHUFFLE( 0, 0, 0, 1 ) ) );
 40 |         vec = func( vec, _mm_shufflelo_epi16( vec, _MM_SHUFFLE( 0, 0, 0, 1 ) ) );
 41 |         vec = func( vec, _mm_srli_epi16( vec, 8 ) );
 42 |         return (int8_t)_mm_cvtsi128_si32( vec );
 43 |     }
 44 | };
 45 | 
 46 | template<>
 47 | struct intravector_op< int16_t, sse_tag >
 48 | {
 49 |     template< typename Function_T >
 50 |     int16_t inline operator()( simd_type< int16_t, sse_tag > vec, Function_T func )
 51 |     {
 52 |         vec = func( vec, _mm_shuffle_epi32( vec, _MM_SHUFFLE( 0, 0, 3, 2 ) ) );
 53 |         vec = func( vec, _mm_shuffle_epi32( vec, _MM_SHUFFLE( 0, 0, 0, 1 ) ) );
 54 |         vec = func( vec, _mm_shufflelo_epi16( vec, _MM_SHUFFLE( 0, 0, 0, 1 ) ) );
 55 |         return (int16_t)_mm_cvtsi128_si32( vec );
 56 |     }
 57 | };
 58 | 
 59 | template<>
 60 | struct intravector_op< int32_t, sse_tag >
 61 | {
 62 |     template< typename Function_T >
 63 |     int32_t inline operator()( simd_type< int32_t, sse_tag > vec, Function_T func )
 64 |     {
 65 |         vec = func( vec, _mm_shuffle_epi32( vec, _MM_SHUFFLE( 0, 0, 3, 2 ) ) );
 66 |         vec = func( vec, _mm_shuffle_epi32( vec, _MM_SHUFFLE( 0, 0, 0, 1 ) ) );
 67 |         return _mm_cvtsi128_si32( vec );
 68 |     }
 69 | };
 70 | 
 71 | template<>
 72 | struct intravector_op< int64_t, sse_tag >
 73 | {
 74 |     template< typename Function_T >
 75 |     int64_t inline operator()( simd_type< int64_t, sse_tag > vec, Function_T func )
 76 |     {
 77 |         vec = func( vec, _mm_shuffle_epi32( vec, _MM_SHUFFLE( 0, 0, 3, 2 ) ) );
 78 |         return _mm_cvtsi128_si64( vec );
 79 |     }
 80 | };
 81 | 
 82 | template<>
 83 | struct intravector_op< float, sse_tag >
 84 | {
 85 |     template< typename Function_T >
 86 |     float inline operator()( simd_type< float, sse_tag > vec, Function_T func )
 87 |     {
 88 |         vec = func( vec, _mm_shuffle_ps( vec, vec, _MM_SHUFFLE( 0, 0, 3, 2 ) ) );
 89 |         vec = func( vec, _mm_shuffle_ps( vec, vec, _MM_SHUFFLE( 0, 0, 0, 1 ) ) );
 90 |         return _mm_cvtss_f32( vec );
 91 |     }
 92 | };
 93 | 
 94 | template<>
 95 | struct intravector_op< double, sse_tag >
 96 | {
 97 |     template< typename Function_T >
 98 |     double inline operator()( simd_type< double, sse_tag > vec, Function_T func )
 99 |     {
100 |         vec = func( vec, _mm_shuffle_pd( vec, vec, _MM_SHUFFLE2( 0, 1 ) ) );
101 |         return _mm_cvtsd_f64( vec );
102 |     }
103 | };
104 | 
105 | } // namespace litesimd
106 | 
107 | #endif // LITESIMD_HAS_SSE
108 | #endif // LITESIMD_ARCH_SSE_INTRAVECTOR_H
109 | 


--------------------------------------------------------------------------------
/include/litesimd/detail/arch/common/shuffle.h:
--------------------------------------------------------------------------------
  1 | // MIT License
  2 | //
  3 | // Copyright (c) 2018 André Tupinambá
  4 | //
  5 | // Permission is hereby granted, free of charge, to any person obtaining a copy
  6 | // of this software and associated documentation files (the "Software"), to deal
  7 | // in the Software without restriction, including without limitation the rights
  8 | // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
  9 | // copies of the Software, and to permit persons to whom the Software is
 10 | // furnished to do so, subject to the following conditions:
 11 | //
 12 | // The above copyright notice and this permission notice shall be included in all
 13 | // copies or substantial portions of the Software.
 14 | //
 15 | // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 16 | // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 17 | // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
 18 | // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
 19 | // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
 20 | // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
 21 | // SOFTWARE.
 22 | 
 23 | #ifndef LITESIMD_ARCH_COMMON_SHUFFLE_H
 24 | #define LITESIMD_ARCH_COMMON_SHUFFLE_H
 25 | 
 26 | #include <litesimd/types.h>
 27 | 
 28 | namespace litesimd {
 29 | 
 30 | // High/Low insert
 31 | // ---------------------------------------------------------------------------------------
 32 | template< typename ValueType_T, typename Tag_T = default_tag >
 33 | inline simd_type< ValueType_T, Tag_T >
 34 | high_insert( simd_type< ValueType_T, Tag_T >, ValueType_T = 0 ){}
 35 | 
 36 | template< typename ValueType_T, typename Tag_T = default_tag >
 37 | inline simd_type< ValueType_T, Tag_T >
 38 | low_insert( simd_type< ValueType_T, Tag_T >, ValueType_T = 0 ){}
 39 | 
 40 | /**
 41 |  * \ingroup shuffle
 42 |  * \brief Combine two SIMD registers using a mask to choose the values.
 43 |  *
 44 |  * The **blend** operation combines 2 SIMD registers using a mask, as a return of 
 45 |  * compare functions, to select each value.
 46 |  *
 47 |  * Example of blend function on SSE and int32_t
 48 |  *
 49 |  * | Index | 3 | 2 | 1 | 0 |
 50 |  * | :--- | :--: | :--: | :--: | :--: |
 51 |  * | litesimd::t_int32_simd **mask** | 0x00000000 | 0xFFFFFFFF | 0xFFFFFFFF | 0x00000000 |
 52 |  * | litesimd::t_int32_simd X( 1 ); | 1 | 1 | 1 | 1 |
 53 |  * | litesimd::t_int32_simd Y( 2 ); | 2 | 2 | 2 | 2 |
 54 |  * | litesimd::blend( mask, X, Y ); | 2 | 1 | 1 | 2 |
 55 |  *
 56 |  * \param mask Mask to select the values
 57 |  * \param trueVal Values select when mask is true
 58 |  * \param falseVal Values select when mask is false
 59 |  * \tparam ValueType_T Base type of original SIMD register
 60 |  * \returns Combined SIMD register
 61 |  *
 62 |  * **Example**
 63 |  * ```{.cpp}
 64 |  * #include <iostream>
 65 |  * #include <litesimd/types.h>
 66 |  * #include <litesimd/shuffle.h>
 67 |  * #include <litesimd/helpers/iostream.h>
 68 |  *
 69 |  * int main()
 70 |  * {
 71 |  *     namespace ls = litesimd;
 72 |  *
 73 |  *     ls::t_int32_simd mask( 0x00000000, 0xffffffff, 0Xffffffff, 0x00000000 );
 74 |  *     ls::t_int32_simd val1( 1 );
 75 |  *     ls::t_int32_simd val2( 2 );
 76 |  *     std::cout << "blend( mask, val1, val2 ): "
 77 |  *               << ls::blend( mask, val1, val2 ) << std::endl;
 78 |  *     return 0;
 79 |  * }
 80 |  * ```
 81 |  * Output on a SSE compilation
 82 |  * ```
 83 |  * blend( mask, val1, val2 ): (2, 1, 1, 2)
 84 |  * ```
 85 |  */
 86 | template< typename ValueType_T, typename Tag_T = default_tag >
 87 | inline simd_type< ValueType_T, Tag_T >
 88 | blend( simd_type< ValueType_T, Tag_T > mask,
 89 |        simd_type< ValueType_T, Tag_T > trueVal,
 90 |        simd_type< ValueType_T, Tag_T > falseVal ){}
 91 | 
 92 | // Get Set
 93 | // ---------------------------------------------------------------------------------------
 94 | template< int index, typename ValueType_T, typename Tag_T = default_tag >
 95 | struct get_functor
 96 | {
 97 |     inline ValueType_T operator()( simd_type< ValueType_T, Tag_T > ){}
 98 | };
 99 | 
100 | template< int index, typename ValueType_T, typename Tag_T = default_tag >
101 | struct set_functor
102 | {
103 |     inline simd_type< ValueType_T, Tag_T > operator()( simd_type< ValueType_T, Tag_T >, ValueType_T ){}
104 | };
105 | 
106 | } // namespace litesimd
107 | 
108 | #endif // LITESIMD_ARCH_COMMON_SHUFFLE_H
109 | 


--------------------------------------------------------------------------------
/include/litesimd/detail/arch/common/algorithm.h:
--------------------------------------------------------------------------------
  1 | // MIT License
  2 | //
  3 | // Copyright (c) 2018 André Tupinambá
  4 | //
  5 | // Permission is hereby granted, free of charge, to any person obtaining a copy
  6 | // of this software and associated documentation files (the "Software"), to deal
  7 | // in the Software without restriction, including without limitation the rights
  8 | // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
  9 | // copies of the Software, and to permit persons to whom the Software is
 10 | // furnished to do so, subject to the following conditions:
 11 | //
 12 | // The above copyright notice and this permission notice shall be included in all
 13 | // copies or substantial portions of the Software.
 14 | //
 15 | // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 16 | // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 17 | // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
 18 | // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
 19 | // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
 20 | // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
 21 | // SOFTWARE.
 22 | 
 23 | #ifndef LITESIMD_ARCH_COMMON_ALGORITHM_H
 24 | #define LITESIMD_ARCH_COMMON_ALGORITHM_H
 25 | 
 26 | #include <litesimd/types.h>
 27 | 
 28 | namespace litesimd {
 29 | 
 30 | // Min max
 31 | // ---------------------------------------------------------------------------------------
 32 | /**
 33 |  * \ingroup algorithm
 34 |  * \brief Compares each value inside the SIMD register and returns the lesser of each pair.
 35 |  *
 36 |  * Each value inside the packed SIMD register is compared indenpendently and the result is
 37 |  * the lesser of the two values on the same index inside the packed SIMD register.
 38 |  *
 39 |  * | Index | 3 | 2 | 1 | 0 |
 40 |  * | :--- | :--: | :--: | :--: | :--: |
 41 |  * | Register X | a | b | c | d |
 42 |  * | Register Y | e | f | g | h |
 43 |  * | litesimd::min( X, Y ) | min( a, e ) | min( b, f ) | min( c, g ) | min( d, h ) |
 44 |  *
 45 |  * \param lhs, rhs SIMD registers to compare
 46 |  * \returns SIMD register with the lesser of each packed value
 47 |  *
 48 |  * **Example**
 49 |  * ```{.cpp}
 50 |  * #include <iostream>
 51 |  * #include <litesimd/types.h>
 52 |  * #include <litesimd/algorithm.h>
 53 |  * #include <litesimd/helpers/iostream.h>
 54 |  *
 55 |  * int main()
 56 |  * {
 57 |  *     namespace ls = litesimd;
 58 |  *     t_int32_simd a( 4, 1, 3, 2 ), b( 2, 2, 1, 4 );
 59 |  *     std::cout << "min( a, b ): " << ls::min( a, b ) << std::endl;
 60 |  *     return 0;
 61 |  * }
 62 |  * ```
 63 |  * Output on a SSE compilation
 64 |  * ```
 65 |  * min( a, b ): ( 2, 1, 1, 2 )
 66 |  * ```
 67 |  *
 68 |  * \see max
 69 |  */
 70 | template< typename ValueType_T, typename Tag_T > inline simd_type< ValueType_T, Tag_T >
 71 | min( simd_type< ValueType_T, Tag_T > lhs, simd_type< ValueType_T, Tag_T > rhs ){}
 72 | 
 73 | /**
 74 |  * \ingroup algorithm
 75 |  * \brief Compares each value inside the SIMD register and returns the greater of each pair.
 76 |  *
 77 |  * Each value inside the packed SIMD register is compared indenpendently and the result is
 78 |  * the greater of the two values on the same index inside the packed SIMD register.
 79 |  *
 80 |  * | Index | 3 | 2 | 1 | 0 |
 81 |  * | :--- | :--: | :--: | :--: | :--: |
 82 |  * | Register X | a | b | c | d |
 83 |  * | Register Y | e | f | g | h |
 84 |  * | litesimd::max( X, Y ) | max( a, e ) | max( b, f ) | max( c, g ) | max( d, h ) |
 85 |  *
 86 |  * \param lhs, rhs SIMD registers to compare
 87 |  * \returns SIMD register with the greater of each packed value
 88 |  *
 89 |  * **Example**
 90 |  * ```{.cpp}
 91 |  * #include <iostream>
 92 |  * #include <litesimd/types.h>
 93 |  * #include <litesimd/algorithm.h>
 94 |  * #include <litesimd/helpers/iostream.h>
 95 |  *
 96 |  * int main()
 97 |  * {
 98 |  *     namespace ls = litesimd;
 99 |  *     t_int32_simd a( 4, 1, 3, 2 ), b( 2, 2, 1, 4 );
100 |  *     std::cout << "max( a, b ): " << ls::max( a, b ) << std::endl;
101 |  *     return 0;
102 |  * }
103 |  * ```
104 |  * Output on a SSE compilation
105 |  * ```
106 |  * max( a, b ): ( 4, 2, 3, 4 )
107 |  * ```
108 |  *
109 |  * \see min
110 |  */
111 | template< typename ValueType_T, typename Tag_T > inline simd_type< ValueType_T, Tag_T >
112 | max( simd_type< ValueType_T, Tag_T > lhs, simd_type< ValueType_T, Tag_T > rhs ){}
113 | 
114 | } // namespace litesimd
115 | 
116 | #endif // LITESIMD_ARCH_COMMON_ALGORITHM_H
117 | 


--------------------------------------------------------------------------------
/include/litesimd/algorithm/minmax.h:
--------------------------------------------------------------------------------
  1 | // MIT License
  2 | //
  3 | // Copyright (c) 2018 André Tupinambá
  4 | //
  5 | // Permission is hereby granted, free of charge, to any person obtaining a copy
  6 | // of this software and associated documentation files (the "Software"), to deal
  7 | // in the Software without restriction, including without limitation the rights
  8 | // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
  9 | // copies of the Software, and to permit persons to whom the Software is
 10 | // furnished to do so, subject to the following conditions:
 11 | //
 12 | // The above copyright notice and this permission notice shall be included in all
 13 | // copies or substantial portions of the Software.
 14 | //
 15 | // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 16 | // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 17 | // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
 18 | // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
 19 | // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
 20 | // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
 21 | // SOFTWARE.
 22 | 
 23 | #ifndef LITESIMD_ALGORITHM_MINMAX_H
 24 | #define LITESIMD_ALGORITHM_MINMAX_H
 25 | 
 26 | #include <litesimd/detail/arch/sse/algorithm.h>
 27 | #include <litesimd/detail/arch/avx/algorithm.h>
 28 | #include <litesimd/intravector.h>
 29 | 
 30 | namespace litesimd {
 31 | 
 32 | /**
 33 |  * \ingroup algorithm
 34 |  * \brief Returns the smallest of each number in the SIMD register.
 35 |  *
 36 |  * \param vec SIMD register to compare
 37 |  * \return The lesser of the values in SIMD register
 38 |  *
 39 |  * **Example**
 40 |  * ```{.cpp}
 41 |  * #include <iostream>
 42 |  * #include <litesimd/types.h>
 43 |  * #include <litesimd/algorithm.h>
 44 |  *
 45 |  * int main()
 46 |  * {
 47 |  *     namespace ls = litesimd;
 48 |  *     std::cout << "min( zero ) == " << ls::min( ls::t_int32_simd::zero() ) << std::endl;
 49 |  *     std::cout << "min( iota ) == " << ls::min( ls::iota< int32_t >( 5 ) ) << std::endl;
 50 |  *     std::cout << "min( 4,3,2,1 ) == " << ls::min( ls::simd_type< int32_t, ls::sse_tag >( 4, 3, 2, 1 ) ) << std::endl;
 51 |  *     return 0;
 52 |  * }
 53 |  * ```
 54 |  * Output on a SSE compilation
 55 |  * ```
 56 |  * min( zero ) == 0
 57 |  * min( iota ) == 5
 58 |  * min( 4,3,2,1 ) == 1
 59 |  * ```
 60 |  *
 61 |  * \see max
 62 |  */
 63 | template< typename SimdType_T, typename SimdType_T::simd_value_type* = nullptr >
 64 | inline typename SimdType_T::simd_value_type min( SimdType_T vec )
 65 | {
 66 |     using type = typename SimdType_T::simd_value_type;
 67 |     using tag = typename SimdType_T::simd_tag;
 68 |     struct op{ inline SimdType_T operator()( SimdType_T lhs, SimdType_T rhs )
 69 |             { return min< type, tag >( lhs, rhs ); } };
 70 |     return intravector_op< type, tag >()( vec, op() );
 71 | }
 72 | 
 73 | /**
 74 |  * \ingroup algorithm
 75 |  * \brief Returns the largest the of each number in the SIMD register.
 76 |  *
 77 |  * \param vec SIMD register to compare
 78 |  * \return The greater of the values in SIMD register
 79 |  *
 80 |  * **Example**
 81 |  * ```{.cpp}
 82 |  * // max example
 83 |  * #include <iostream>
 84 |  * #include <litesimd/types.h>
 85 |  * #include <litesimd/algorithm.h>
 86 |  *
 87 |  * int main()
 88 |  * {
 89 |  *     namespace ls = litesimd;
 90 |  *     std::cout << "max( zero ) == " << ls::max( ls::t_int32_simd::zero() ) << std::endl;
 91 |  *     std::cout << "max( iota ) == " << ls::max( ls::iota< int32_t, ls::sse_tag >( 5 ) ) << std::endl;
 92 |  *     std::cout << "max( 4,3,2,1 ) == " << ls::max( ls::simd_type< int32_t, ls::sse_tag >( 4, 3, 2, 1 ) ) << std::endl;
 93 |  *     return 0;
 94 |  * }
 95 |  * ```
 96 |  * Output on a SSE compilation
 97 |  * ```
 98 |  * max( zero ) == 0
 99 |  * max( iota ) == 8
100 |  * max( 4,3,2,1 ) == 4
101 |  * ```
102 |  *
103 |  * \see min
104 |  */
105 | template< typename SimdType_T, typename SimdType_T::simd_value_type* = nullptr >
106 | inline typename SimdType_T::simd_value_type max( SimdType_T vec )
107 | {
108 |     using type = typename SimdType_T::simd_value_type;
109 |     using tag = typename SimdType_T::simd_tag;
110 |     struct op{ inline SimdType_T operator()( SimdType_T lhs, SimdType_T rhs )
111 |             { return max< type, tag >( lhs, rhs ); } };
112 |     return intravector_op< type, tag >()( vec, op() );
113 | }
114 | 
115 | } // namespace litesimd
116 | 
117 | #endif // LITESIMD_ALGORITHM_MINMAX_H
118 | 


--------------------------------------------------------------------------------
/test/shuffle.cpp:
--------------------------------------------------------------------------------
  1 | // MIT License
  2 | //
  3 | // Copyright (c) 2018 André Tupinambá
  4 | //
  5 | // Permission is hereby granted, free of charge, to any person obtaining a copy
  6 | // of this software and associated documentation files (the "Software"), to deal
  7 | // in the Software without restriction, including without limitation the rights
  8 | // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
  9 | // copies of the Software, and to permit persons to whom the Software is
 10 | // furnished to do so, subject to the following conditions:
 11 | //
 12 | // The above copyright notice and this permission notice shall be included in all
 13 | // copies or substantial portions of the Software.
 14 | //
 15 | // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 16 | // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 17 | // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
 18 | // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
 19 | // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
 20 | // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
 21 | // SOFTWARE.
 22 | 
 23 | #include <litesimd/types.h>
 24 | #include <litesimd/shuffle.h>
 25 | #include <litesimd/algorithm.h>
 26 | #include <litesimd/helpers/iostream.h>
 27 | #include "gtest/gtest.h"
 28 | 
 29 | namespace ls = litesimd;
 30 | 
 31 | template <typename T> class ShuffleTypedTest: public ::testing::Test {};
 32 | 
 33 | using TestTypes = ::testing::Types<
 34 | #ifdef __SSE2__
 35 |     std::pair<int8_t, ls::sse_tag>, std::pair<int16_t, ls::sse_tag>,
 36 |     std::pair<int32_t, ls::sse_tag>, std::pair<int64_t, ls::sse_tag>,
 37 |     std::pair<float, ls::sse_tag>, std::pair<double, ls::sse_tag>
 38 | #ifdef __AVX2__
 39 |     , std::pair<int8_t, ls::avx_tag>, std::pair<int16_t, ls::avx_tag>,
 40 |     std::pair<int32_t, ls::avx_tag>, std::pair<int64_t, ls::avx_tag>,
 41 |     std::pair<float, ls::avx_tag>, std::pair<double, ls::avx_tag>
 42 | #endif //__AVX2__
 43 | #endif //__SSE2__
 44 | >;
 45 | TYPED_TEST_CASE(ShuffleTypedTest, TestTypes);
 46 | 
 47 | #ifdef __SSE2__
 48 | TEST(BaseTest, Set1FloatTest)
 49 | {
 50 |     __m128 a = _mm_set_ps( 4.0f, 3.0f, 2.0f, 1.0f );
 51 |     EXPECT_FLOAT_EQ( 1.0f, (ls::get<0, float, ls::sse_tag >( a )) );
 52 | 
 53 |     __m128d c = _mm_set_pd( 2.0, 1.0 );
 54 |     EXPECT_DOUBLE_EQ( 1.0, (ls::get<0, double, ls::sse_tag >( c )) );
 55 | 
 56 | #ifdef __AVX2__
 57 |     __m256 b = _mm256_set_ps( 8.0f, 7.0f, 6.0f, 5.0f, 4.0f, 3.0f, 2.0f, 1.0f );
 58 |     EXPECT_FLOAT_EQ( 1.0f, (ls::get<0, float, ls::avx_tag >( b )) );
 59 | 
 60 |     __m256d d = _mm256_set_pd( 4.0, 3.0, 2.0, 1.0 );
 61 |     EXPECT_DOUBLE_EQ( 1.0, (ls::get<0, double, ls::avx_tag >( d )) );
 62 | 
 63 | #endif //__AVX2__
 64 | }
 65 | 
 66 | TYPED_TEST(ShuffleTypedTest, GetSetTest)
 67 | {
 68 |     using type = typename TypeParam::first_type;
 69 |     using tag = typename TypeParam::second_type;
 70 |     using simd = ls::simd_type< type, tag >;
 71 | 
 72 |     simd a = simd::zero();
 73 | 
 74 |     EXPECT_EQ( static_cast<type>(0), (ls::get<0, type, tag >( a )) );
 75 | 
 76 |     a = ls::set< 0, type, tag >( a, (type)1 );
 77 | 
 78 |     EXPECT_EQ( static_cast<type>(1), (ls::get<0, type, tag >( a )) );
 79 | }
 80 | 
 81 | TYPED_TEST(ShuffleTypedTest, HighInsertTest)
 82 | {
 83 |     using type = typename TypeParam::first_type;
 84 |     using tag = typename TypeParam::second_type;
 85 |     using simd = ls::simd_type< type, tag >;
 86 | 
 87 |     simd a = simd::zero();
 88 |     a = ls::high_insert( a, 1 );
 89 |     EXPECT_EQ( 1, (ls::get< simd::simd_size -1, type, tag> ( a )) ) << "Simd: " << a;
 90 | 
 91 |     a = ls::high_insert( ls::iota< type, tag >( 0 ), simd::simd_size );
 92 | 
 93 |     // Weird bug on MacOSX clang 9
 94 |     // simd_type is a constexpr but somehow EXPECT_EQ macro creates an external symbol to it
 95 |     // on clang 9. This results in a linker error since there is no simd_size symbol defined.
 96 |     auto ssize = simd::simd_size;
 97 |     EXPECT_EQ( ssize, (ls::get< simd::simd_size -1, type, tag> ( a )) ) << "Simd: " << a;
 98 | 
 99 |     ls::for_each( a, [&a]( int index, type val )
100 |     {
101 |         EXPECT_EQ( static_cast<type>( index + 1 ), val ) << "Error on index " << index << ", Simd: " << a;
102 |         return true;
103 |     } );
104 | }
105 | 
106 | TYPED_TEST(ShuffleTypedTest, LowInsertTest)
107 | {
108 |     using type = typename TypeParam::first_type;
109 |     using tag = typename TypeParam::second_type;
110 |     using simd = ls::simd_type< type, tag >;
111 | 
112 |     simd a = ls::iota< type, tag >( 1 );
113 |     a = ls::low_insert( a, 0 );
114 |     EXPECT_EQ( 0, (ls::get< 0, type, tag> ( a )) );
115 |     ls::for_each( a, []( int index, type val )
116 |     {
117 |         EXPECT_EQ( static_cast<type>( index ), val ) << "Error on index " << index;
118 |         return true;
119 |     } );
120 | }
121 | #endif // __SSE2__
122 | 


--------------------------------------------------------------------------------
/include/litesimd/detail/arch/avx/intravector.h:
--------------------------------------------------------------------------------
  1 | // MIT License
  2 | //
  3 | // Copyright (c) 2018 André Tupinambá
  4 | //
  5 | // Permission is hereby granted, free of charge, to any person obtaining a copy
  6 | // of this software and associated documentation files (the "Software"), to deal
  7 | // in the Software without restriction, including without limitation the rights
  8 | // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
  9 | // copies of the Software, and to permit persons to whom the Software is
 10 | // furnished to do so, subject to the following conditions:
 11 | //
 12 | // The above copyright notice and this permission notice shall be included in all
 13 | // copies or substantial portions of the Software.
 14 | //
 15 | // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 16 | // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 17 | // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
 18 | // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
 19 | // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
 20 | // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
 21 | // SOFTWARE.
 22 | 
 23 | #ifndef LITESIMD_ARCH_AVX_INTRAVECTOR_H
 24 | #define LITESIMD_ARCH_AVX_INTRAVECTOR_H
 25 | 
 26 | #ifdef LITESIMD_HAS_AVX
 27 | 
 28 | #include <litesimd/detail/arch/common/intravector.h>
 29 | #include <litesimd/detail/arch/avx/detail/compatibility.h>
 30 | 
 31 | namespace litesimd {
 32 | 
 33 | template<>
 34 | struct intravector_op< int8_t, avx_tag >
 35 | {
 36 |     template< typename Function_T >
 37 |     int8_t inline operator()( simd_type< int8_t, avx_tag > vec, Function_T func )
 38 |     {
 39 |         vec = func( vec, _mm256_shuffle_epi32( vec, _MM_SHUFFLE( 0, 0, 3, 2 ) ) );
 40 |         vec = func( vec, _mm256_shuffle_epi32( vec, _MM_SHUFFLE( 0, 0, 0, 1 ) ) );
 41 |         vec = func( vec, _mm256_permute4x64_epi64( vec, _MM_SHUFFLE( 0, 0, 0, 2 ) ) );
 42 |         vec = func( vec, _mm256_shufflelo_epi16( vec, _MM_SHUFFLE( 0, 0, 0, 1 ) ) );
 43 |         vec = func( vec, _mm256_srli_epi16( vec, 8 ) );
 44 |         return (int8_t)_mm_cvtsi128_si32( _mm256_extracti128_si256( vec, 0 ) );
 45 |     }
 46 | };
 47 | 
 48 | template<>
 49 | struct intravector_op< int16_t, avx_tag >
 50 | {
 51 |     template< typename Function_T >
 52 |     int16_t inline operator()( simd_type< int16_t, avx_tag > vec, Function_T func )
 53 |     {
 54 |         vec = func( vec, _mm256_shuffle_epi32( vec, _MM_SHUFFLE( 0, 0, 3, 2 ) ) );
 55 |         vec = func( vec, _mm256_shuffle_epi32( vec, _MM_SHUFFLE( 0, 0, 0, 1 ) ) );
 56 |         vec = func( vec, _mm256_permute4x64_epi64( vec, _MM_SHUFFLE( 0, 0, 0, 2 ) ) );
 57 |         vec = func( vec, _mm256_shufflelo_epi16( vec, _MM_SHUFFLE( 0, 0, 0, 1 ) ) );
 58 |         return (int16_t)_mm_cvtsi128_si32( _mm256_extracti128_si256( vec, 0 ) );
 59 |     }
 60 | };
 61 | 
 62 | template<>
 63 | struct intravector_op< int32_t, avx_tag >
 64 | {
 65 |     template< typename Function_T >
 66 |     int32_t inline operator()( simd_type< int32_t, avx_tag > vec, Function_T func )
 67 |     {
 68 |         vec = func( vec, _mm256_shuffle_epi32( vec, _MM_SHUFFLE( 0, 0, 3, 2 ) ) );
 69 |         vec = func( vec, _mm256_shuffle_epi32( vec, _MM_SHUFFLE( 0, 0, 0, 1 ) ) );
 70 |         vec = func( vec, _mm256_permute4x64_epi64( vec, _MM_SHUFFLE( 0, 0, 0, 2 ) ) );
 71 |         return _mm_cvtsi128_si32( _mm256_extracti128_si256( vec, 0 ) );
 72 |     }
 73 | };
 74 | 
 75 | template<>
 76 | struct intravector_op< int64_t, avx_tag >
 77 | {
 78 |     template< typename Function_T >
 79 |     int64_t inline operator()( simd_type< int64_t, avx_tag > vec, Function_T func )
 80 |     {
 81 |         vec = func( vec, _mm256_permute4x64_epi64( vec, _MM_SHUFFLE( 0, 0, 3, 2 ) ) );
 82 |         vec = func( vec, _mm256_permute4x64_epi64( vec, _MM_SHUFFLE( 0, 0, 0, 1 ) ) );
 83 |         return _mm256_extract_epi64( vec, 0 );
 84 |     }
 85 | };
 86 | 
 87 | template<>
 88 | struct intravector_op< float, avx_tag >
 89 | {
 90 |     template< typename Function_T >
 91 |     float inline operator()( simd_type< float, avx_tag > vec, Function_T func )
 92 |     {
 93 |         vec = func( vec, _mm256_permute2f128_ps( vec, vec, 1 ) );
 94 |         vec = func( vec, _mm256_permute_ps( vec, _MM_SHUFFLE( 0, 0, 3, 2 ) ) );
 95 |         vec = func( vec, _mm256_permute_ps( vec, _MM_SHUFFLE( 0, 0, 0, 1 ) ) );
 96 |         return _mm256_cvtss_f32( vec );
 97 |     }
 98 | };
 99 | 
100 | template<>
101 | struct intravector_op< double, avx_tag >
102 | {
103 |     template< typename Function_T >
104 |     double inline operator()( simd_type< double, avx_tag > vec, Function_T func )
105 |     {
106 |         vec = func( vec, _mm256_permute4x64_pd( vec, _MM_SHUFFLE( 0, 0, 3, 2 ) ) );
107 |         vec = func( vec, _mm256_permute4x64_pd( vec, _MM_SHUFFLE( 0, 0, 0, 1 ) ) );
108 |         return _mm256_cvtsd_f64( vec );
109 |     }
110 | };
111 | 
112 | } // namespace litesimd
113 | 
114 | #endif // LITESIMD_HAS_AVX
115 | #endif // LITESIMD_ARCH_AVX_INTRAVECTOR_H
116 | 


--------------------------------------------------------------------------------
/test/bitwise.cpp:
--------------------------------------------------------------------------------
  1 | // MIT License
  2 | //
  3 | // Copyright (c) 2018 André Tupinambá
  4 | //
  5 | // Permission is hereby granted, free of charge, to any person obtaining a copy
  6 | // of this software and associated documentation files (the "Software"), to deal
  7 | // in the Software without restriction, including without limitation the rights
  8 | // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
  9 | // copies of the Software, and to permit persons to whom the Software is
 10 | // furnished to do so, subject to the following conditions:
 11 | //
 12 | // The above copyright notice and this permission notice shall be included in all
 13 | // copies or substantial portions of the Software.
 14 | //
 15 | // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 16 | // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 17 | // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
 18 | // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
 19 | // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
 20 | // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
 21 | // SOFTWARE.
 22 | 
 23 | #include <litesimd/types.h>
 24 | #include <litesimd/bitwise.h>
 25 | #include <litesimd/shuffle.h>
 26 | #include <litesimd/algorithm.h>
 27 | #include <litesimd/helpers/iostream.h>
 28 | #include "gtest/gtest.h"
 29 | 
 30 | namespace ls = litesimd;
 31 | 
 32 | template <typename T> class BitwiseTypedTest: public ::testing::Test {};
 33 | 
 34 | using TestTypes = ::testing::Types<
 35 | #ifdef __SSE2__
 36 |     std::pair<int8_t, ls::sse_tag>, std::pair<int16_t, ls::sse_tag>,
 37 |     std::pair<int32_t, ls::sse_tag>, std::pair<int64_t, ls::sse_tag>
 38 | #ifdef __AVX2__
 39 |     , std::pair<int8_t, ls::avx_tag>, std::pair<int16_t, ls::avx_tag>,
 40 |     std::pair<int32_t, ls::avx_tag>, std::pair<int64_t, ls::avx_tag>
 41 | #endif //__AVX2__
 42 | #endif //__SSE2__
 43 | >;
 44 | TYPED_TEST_CASE(BitwiseTypedTest, TestTypes);
 45 | 
 46 | #ifdef __SSE2__
 47 | TYPED_TEST(BitwiseTypedTest, AndTypedTest)
 48 | {
 49 |     using type = typename TypeParam::first_type;
 50 |     using tag = typename TypeParam::second_type;
 51 |     using simd = ls::simd_type< type, tag >;
 52 | 
 53 |     simd a = simd( 3 );
 54 |     simd b = simd( 6 );
 55 | 
 56 |     ls::for_each( ls::bit_and< type, tag >( a, b ), []( int index, type val )
 57 |     {
 58 |         EXPECT_EQ( 2, val ) << "Error on index " << index;
 59 |         return true;
 60 |     } );
 61 | 
 62 |     ls::for_each( a & b, []( int index, type val )
 63 |     {
 64 |         EXPECT_EQ( 2, val ) << "Error on index " << index;
 65 |         return true;
 66 |     } );
 67 | 
 68 |     a = ls::set<0, type, tag>( a, 6 );
 69 |     auto ret = ls::bit_and(a);
 70 |     EXPECT_EQ( static_cast<type>(6), (ls::get<0, type, tag>(a)) ) << "Simd: " << a;
 71 |     EXPECT_EQ( static_cast<type>(2), ret ) << "ret, Simd: " << +ret << ", " << a;
 72 | }
 73 | 
 74 | TYPED_TEST(BitwiseTypedTest, OrTypedTest)
 75 | {
 76 |     using type = typename TypeParam::first_type;
 77 |     using tag = typename TypeParam::second_type;
 78 |     using simd = ls::simd_type< type, tag >;
 79 | 
 80 |     simd a = simd( 3 );
 81 |     simd b = simd( 6 );
 82 | 
 83 |     ls::for_each( ls::bit_or< type, tag >( a, b ), []( int index, type val )
 84 |     {
 85 |         EXPECT_EQ( 7, val ) << "Error on index " << index;
 86 |         return true;
 87 |     } );
 88 | 
 89 |     ls::for_each( a | b, []( int index, type val )
 90 |     {
 91 |         EXPECT_EQ( 7, val ) << "Error on index " << index;
 92 |         return true;
 93 |     } );
 94 | 
 95 |     a = ls::set<0, type, tag>( a, 6 );
 96 |     EXPECT_EQ( static_cast<type>(7), ls::bit_or( a ) );
 97 | }
 98 | 
 99 | TYPED_TEST(BitwiseTypedTest, XorTypedTest)
100 | {
101 |     using type = typename TypeParam::first_type;
102 |     using tag = typename TypeParam::second_type;
103 |     using simd = ls::simd_type< type, tag >;
104 | 
105 |     simd a = simd( 3 );
106 |     simd b = simd( 6 );
107 | 
108 |     ls::for_each( ls::bit_xor< type, tag >( a, b ), []( int index, type val )
109 |     {
110 |         EXPECT_EQ( 5, val ) << "Error on index " << index;
111 |         return true;
112 |     } );
113 | 
114 |     ls::for_each( a ^ b, []( int index, type val )
115 |     {
116 |         EXPECT_EQ( 5, val ) << "Error on index " << index;
117 |         return true;
118 |     } );
119 | 
120 |     a = ls::set<0, type, tag>( a, 6 );
121 |     EXPECT_EQ( static_cast<type>(5), ls::bit_xor( a ) );
122 | }
123 | 
124 | TYPED_TEST(BitwiseTypedTest, NotTypedTest)
125 | {
126 |     using type = typename TypeParam::first_type;
127 |     using tag = typename TypeParam::second_type;
128 |     using simd = ls::simd_type< type, tag >;
129 | 
130 |     simd a = simd( 3 );
131 | 
132 |     ls::for_each( ls::bit_not< type, tag >( a ), []( int index, type val )
133 |     {
134 |         EXPECT_EQ( static_cast<type>( ~3 ), val ) << "Error on index " << index;
135 |         return true;
136 |     } );
137 | 
138 |     ls::for_each( ~a, []( int index, type val )
139 |     {
140 |         EXPECT_EQ( static_cast<type>( ~3 ), val ) << "Error on index " << index;
141 |         return true;
142 |     } );
143 | }
144 | 
145 | #endif //__SSE2__
146 | 


--------------------------------------------------------------------------------
/test/intravector.cpp:
--------------------------------------------------------------------------------
  1 | // MIT License
  2 | //
  3 | // Copyright (c) 2018 André Tupinambá
  4 | //
  5 | // Permission is hereby granted, free of charge, to any person obtaining a copy
  6 | // of this software and associated documentation files (the "Software"), to deal
  7 | // in the Software without restriction, including without limitation the rights
  8 | // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
  9 | // copies of the Software, and to permit persons to whom the Software is
 10 | // furnished to do so, subject to the following conditions:
 11 | //
 12 | // The above copyright notice and this permission notice shall be included in all
 13 | // copies or substantial portions of the Software.
 14 | //
 15 | // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 16 | // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 17 | // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
 18 | // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
 19 | // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
 20 | // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
 21 | // SOFTWARE.
 22 | 
 23 | #include <litesimd/types.h>
 24 | #include <litesimd/intravector.h>
 25 | #include <litesimd/arithmetic.h>
 26 | #include <litesimd/helpers/iostream.h>
 27 | #include "gtest/gtest.h"
 28 | 
 29 | namespace ls = litesimd;
 30 | 
 31 | template <typename T> class IntravectorTypedTest: public ::testing::Test {};
 32 | 
 33 | using TestTypes = ::testing::Types<
 34 | #ifdef LITESIMD_HAS_SSE
 35 |     std::pair<int8_t, ls::sse_tag>, std::pair<int16_t, ls::sse_tag>,
 36 |     std::pair<int32_t, ls::sse_tag>, std::pair<int64_t, ls::sse_tag>,
 37 |     std::pair<float, ls::sse_tag>, std::pair<double, ls::sse_tag>
 38 | #ifdef LITESIMD_HAS_AVX
 39 |     , std::pair<int8_t, ls::avx_tag>, std::pair<int16_t, ls::avx_tag>,
 40 |     std::pair<int32_t, ls::avx_tag>, std::pair<int64_t, ls::avx_tag>,
 41 |     std::pair<float, ls::avx_tag>, std::pair<double, ls::avx_tag>
 42 | #endif // LITESIMD_HAS_AVX
 43 | #endif // LITESIMD_HAS_SSE
 44 | >;
 45 | TYPED_TEST_CASE(IntravectorTypedTest, TestTypes);
 46 | 
 47 | #ifdef LITESIMD_HAS_SSE
 48 | TYPED_TEST(IntravectorTypedTest, HorizontalAritmeticTest)
 49 | {
 50 |     using type = typename TypeParam::first_type;
 51 |     using tag = typename TypeParam::second_type;
 52 |     using simd = ls::simd_type< type, tag >;
 53 |     using func_t = simd(*)(simd, simd);
 54 | 
 55 |     simd a = ls::iota< type, tag >( 1 );
 56 |     type sum = 0;
 57 |     for( size_t i = 0; i < simd::simd_size; ++i )
 58 |     {
 59 |         sum += static_cast<type>( i + 1 );
 60 |     }
 61 |     EXPECT_EQ( sum, ls::horizontal( a, static_cast< func_t >(ls::add< type, tag >) ) );
 62 | }
 63 | 
 64 | TYPED_TEST(IntravectorTypedTest, HorizontalLambdaTest)
 65 | {
 66 |     using type = typename TypeParam::first_type;
 67 |     using tag = typename TypeParam::second_type;
 68 |     using simd = ls::simd_type< type, tag >;
 69 | 
 70 |     simd a = ls::iota< type, tag >( 1 );
 71 |     type sum = 0;
 72 |     for( size_t i = 0; i < simd::simd_size; ++i )
 73 |     {
 74 |         sum += static_cast<type>( i + 2 );
 75 |     }
 76 |     --sum;
 77 |     auto func = []( simd lhs, simd rhs ) -> simd
 78 |     {
 79 |         return ls::add( static_cast< type >( 1 ), ls::add( lhs, rhs ) );
 80 |     };
 81 |     EXPECT_EQ( sum, ls::horizontal( a, func ) );
 82 | }
 83 | 
 84 | #if (__GNUC__ >= 5) || defined(__clang__)
 85 | // Not compatible with old GCCs
 86 | TEST(BaseTest, HorizontalIntrincsTest)
 87 | {
 88 |     // Sum ones, because iota will give us an overflow
 89 |     auto  i8 = ls::simd_type<  int8_t, ls::sse_tag >( 1 );
 90 |     auto i16 = ls::iota< int16_t, ls::sse_tag >( 1 );
 91 |     auto i32 = ls::iota< int32_t, ls::sse_tag >( 1 );
 92 |     auto i64 = ls::iota< int64_t, ls::sse_tag >( 1 );
 93 |     auto f32 = ls::iota<   float, ls::sse_tag >( 1 );
 94 |     auto f64 = ls::iota<  double, ls::sse_tag >( 1 );
 95 | 
 96 |     EXPECT_EQ( 16, ls::horizontal(  i8, _mm_add_epi8 ) );
 97 |     EXPECT_EQ( 36, ls::horizontal( i16, _mm_add_epi16 ) );
 98 |     EXPECT_EQ( 10, ls::horizontal( i32, _mm_add_epi32 ) );
 99 |     EXPECT_EQ(  3, ls::horizontal( i64, _mm_add_epi64 ) );
100 |     EXPECT_FLOAT_EQ(  10, ls::horizontal( f32, _mm_add_ps ) );
101 |     EXPECT_DOUBLE_EQ(  3, ls::horizontal( f64, _mm_add_pd ) );
102 | 
103 | #ifdef LITESIMD_HAS_AVX
104 |     // Sum ones, because iota will give us an overflow
105 |     auto  i8a = ls::simd_type<  int8_t, ls::avx_tag >( 1 );
106 |     auto i16a = ls::iota< int16_t, ls::avx_tag >( 1 );
107 |     auto i32a = ls::iota< int32_t, ls::avx_tag >( 1 );
108 |     auto i64a = ls::iota< int64_t, ls::avx_tag >( 1 );
109 |     auto f32a = ls::iota<   float, ls::avx_tag >( 1 );
110 |     auto f64a = ls::iota<  double, ls::avx_tag >( 1 );
111 | 
112 |     EXPECT_EQ(  32, ls::horizontal(  i8a, _mm256_add_epi8 ) );
113 |     EXPECT_EQ( 136, ls::horizontal( i16a, _mm256_add_epi16 ) );
114 |     EXPECT_EQ(  36, ls::horizontal( i32a, _mm256_add_epi32 ) );
115 |     EXPECT_EQ(  10, ls::horizontal( i64a, _mm256_add_epi64 ) );
116 |     EXPECT_FLOAT_EQ(  36, ls::horizontal( f32a, _mm256_add_ps ) );
117 |     EXPECT_DOUBLE_EQ( 10, ls::horizontal( f64a, _mm256_add_pd ) );
118 | #endif // LITESIMD_HAS_AVX
119 | }
120 | #endif // __GNUC__
121 | 
122 | #endif // LITESIMD_HAS_SSE
123 | 


--------------------------------------------------------------------------------
/test/arithmetic.cpp:
--------------------------------------------------------------------------------
  1 | // MIT License
  2 | //
  3 | // Copyright (c) 2018 André Tupinambá
  4 | //
  5 | // Permission is hereby granted, free of charge, to any person obtaining a copy
  6 | // of this software and associated documentation files (the "Software"), to deal
  7 | // in the Software without restriction, including without limitation the rights
  8 | // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
  9 | // copies of the Software, and to permit persons to whom the Software is
 10 | // furnished to do so, subject to the following conditions:
 11 | //
 12 | // The above copyright notice and this permission notice shall be included in all
 13 | // copies or substantial portions of the Software.
 14 | //
 15 | // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 16 | // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 17 | // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
 18 | // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
 19 | // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
 20 | // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
 21 | // SOFTWARE.
 22 | 
 23 | #include <litesimd/types.h>
 24 | #include <litesimd/algorithm.h>
 25 | #include <litesimd/arithmetic.h>
 26 | #include "gtest/gtest.h"
 27 | 
 28 | namespace ls = litesimd;
 29 | 
 30 | template <typename T> class ArithmeticTypedTest: public ::testing::Test {};
 31 | 
 32 | using TestTypes = ::testing::Types<
 33 | #ifdef __SSE2__
 34 |     std::pair<int8_t, ls::sse_tag>, std::pair<int16_t, ls::sse_tag>,
 35 |     std::pair<int32_t, ls::sse_tag>, std::pair<int64_t, ls::sse_tag>,
 36 |     std::pair<float, ls::sse_tag>, std::pair<double, ls::sse_tag>
 37 | #ifdef __AVX2__
 38 |     , std::pair<int8_t, ls::avx_tag>, std::pair<int16_t, ls::avx_tag>,
 39 |     std::pair<int32_t, ls::avx_tag>, std::pair<int64_t, ls::avx_tag>,
 40 |     std::pair<float, ls::avx_tag>, std::pair<double, ls::avx_tag>
 41 | #endif //__AVX2__
 42 | #endif //__SSE2__
 43 | >;
 44 | TYPED_TEST_CASE(ArithmeticTypedTest, TestTypes);
 45 | 
 46 | template <typename T> class ArithmeticTaggedTest: public ::testing::Test {};
 47 | 
 48 | using TagTypes = ::testing::Types<
 49 | #ifdef __SSE2__
 50 | ls::sse_tag
 51 | #ifdef __AVX2__
 52 | , ls::avx_tag
 53 | #endif //__AVX2__
 54 | #endif //__SSE2__
 55 | >;
 56 | 
 57 | TYPED_TEST_CASE(ArithmeticTaggedTest, TagTypes);
 58 | 
 59 | #ifdef __SSE2__
 60 | TYPED_TEST(ArithmeticTypedTest, AddTypedTest)
 61 | {
 62 |     using type = typename TypeParam::first_type;
 63 |     using tag = typename TypeParam::second_type;
 64 |     using simd = ls::simd_type< type, tag >;
 65 | 
 66 |     simd a = simd( static_cast<type>(1) );
 67 |     simd b = simd( static_cast<type>(2) );
 68 | 
 69 |     ls::for_each( ls::add< type, tag >( a, b ), []( int index, type val )
 70 |     {
 71 |         EXPECT_EQ( static_cast<type>(3), val ) << "Error on index " << index;
 72 |         return true;
 73 |     } );
 74 | 
 75 |     ls::for_each( a + b, []( int index, type val )
 76 |     {
 77 |         EXPECT_EQ( static_cast<type>(3), val ) << "Error on index " << index;
 78 |         return true;
 79 |     } );
 80 | }
 81 | 
 82 | TYPED_TEST(ArithmeticTypedTest, SubTypedTest)
 83 | {
 84 |     using type = typename TypeParam::first_type;
 85 |     using tag = typename TypeParam::second_type;
 86 |     using simd = ls::simd_type< type, tag >;
 87 | 
 88 |     simd a = simd( 8 );
 89 |     simd b = simd( 3 );
 90 | 
 91 |     ls::for_each( ls::sub< type, tag >( a, b ), []( int index, type val )
 92 |     {
 93 |         EXPECT_EQ( 5, val ) << "Error on index " << index;
 94 |         return true;
 95 |     } );
 96 | 
 97 |     ls::for_each( a - b, []( int index, type val )
 98 |     {
 99 |         EXPECT_EQ( 5, val ) << "Error on index " << index;
100 |         return true;
101 |     } );
102 | }
103 | 
104 | TYPED_TEST(ArithmeticTaggedTest, MulLoHiTest)
105 | {
106 |     using tag = TypeParam;
107 |     using simd16 = ls::simd_type< int16_t, tag >;
108 |     using simd32 = ls::simd_type< int32_t, tag >;
109 | 
110 |     simd16 a = simd16( 0x2003 );
111 |     simd16 b = simd16( 0x30 );
112 | 
113 |     ls::for_each( ls::mullo( a, b ), []( int index, int16_t val )
114 |     {
115 |         EXPECT_EQ( 0x90, val ) << "Error on index " << index;
116 |         return true;
117 |     } );
118 | 
119 |     ls::for_each( ls::mulhi( a, b ), []( int index, int16_t val )
120 |     {
121 |         EXPECT_EQ( 6, val ) << "Error on index " << index;
122 |         return true;
123 |     } );
124 | 
125 |     simd32 d = simd32( 0x20000003 );
126 |     simd32 e = simd32( 0x30 );
127 | 
128 |     ls::for_each( ls::mullo( d, e ), []( int index, int32_t val )
129 |     {
130 |         EXPECT_EQ( 0x90, val ) << "Error on index " << index;
131 |         return true;
132 |     } );
133 | }
134 | 
135 | TYPED_TEST(ArithmeticTaggedTest, DivTest)
136 | {
137 |     using tag = TypeParam;
138 |     using simdf = ls::simd_type< float, tag >;
139 |     using simdd = ls::simd_type< double, tag >;
140 | 
141 |     simdf a = simdf( 20 );
142 |     simdf b = simdf( 2 );
143 | 
144 |     ls::for_each( ls::div( a, b ), []( int index, float val )
145 |     {
146 |         EXPECT_FLOAT_EQ( 10, val ) << "Error on index " << index;
147 |         return true;
148 |     } );
149 | 
150 |     simdd d = simdd( 20 );
151 |     simdd e = simdd( 2 );
152 | 
153 |     ls::for_each( ls::div( d, e ), []( int index, double val )
154 |     {
155 |         EXPECT_DOUBLE_EQ( 10, val ) << "Error on index " << index;
156 |         return true;
157 |     } );
158 | }
159 | #endif //__SSE2__
160 | 


--------------------------------------------------------------------------------
/include/litesimd/detail/arch/common/bitwise.h:
--------------------------------------------------------------------------------
  1 | // MIT License
  2 | //
  3 | // Copyright (c) 2018 André Tupinambá
  4 | //
  5 | // Permission is hereby granted, free of charge, to any person obtaining a copy
  6 | // of this software and associated documentation files (the "Software"), to deal
  7 | // in the Software without restriction, including without limitation the rights
  8 | // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
  9 | // copies of the Software, and to permit persons to whom the Software is
 10 | // furnished to do so, subject to the following conditions:
 11 | //
 12 | // The above copyright notice and this permission notice shall be included in all
 13 | // copies or substantial portions of the Software.
 14 | //
 15 | // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 16 | // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 17 | // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
 18 | // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
 19 | // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
 20 | // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
 21 | // SOFTWARE.
 22 | 
 23 | #ifndef LITESIMD_ARCH_COMMON_BITWISE_H
 24 | #define LITESIMD_ARCH_COMMON_BITWISE_H
 25 | 
 26 | #include <litesimd/types.h>
 27 | 
 28 | namespace litesimd {
 29 | 
 30 | // Basic bitwise operations
 31 | // ---------------------------------------------------------------------------------------
 32 | 
 33 | /**
 34 |  * \ingroup bitwise
 35 |  * \brief Returns the bitwise AND operation between passed parameters.
 36 |  *
 37 |  * \param lhs, rhs SIMD registers to apply bitwise AND operation.
 38 |  * \returns SIMD register with the result of the operation.
 39 |  *
 40 |  * **Example**
 41 |  * ```{.cpp}
 42 |  * #include <iostream>
 43 |  * #include <litesimd/types.h>
 44 |  * #include <litesimd/bitwise.h>
 45 |  * #include <litesimd/helpers/iostream.h>
 46 |  *
 47 |  * int main()
 48 |  * {
 49 |  *     namespace ls = litesimd;
 50 |  *
 51 |  *     ls::t_int32_simd a( 3 ), b( 6 );
 52 |  *     std::cout << "bit_and( a, b ): " << ls::bit_and( a, b ) << std::endl;
 53 |  *     return 0;
 54 |  * }
 55 |  * ```
 56 |  * Output on a SSE compilation
 57 |  * ```
 58 |  * bit_and( a, b ): (2, 2, 2, 2)
 59 |  * ```
 60 |  */
 61 | template< typename ValueType_T, typename Tag_T = default_tag >
 62 | inline simd_type< ValueType_T, Tag_T >
 63 | bit_and( simd_type< ValueType_T, Tag_T > lhs, simd_type< ValueType_T, Tag_T > rhs ){}
 64 | 
 65 | /**
 66 |  * \ingroup bitwise
 67 |  * \brief Returns the bitwise OR operation between passed parameters.
 68 |  *
 69 |  * \param lhs, rhs SIMD registers to apply bitwise OR operation.
 70 |  * \returns SIMD register with the result of the operation.
 71 |  *
 72 |  * **Example**
 73 |  * ```{.cpp}
 74 |  * #include <iostream>
 75 |  * #include <litesimd/types.h>
 76 |  * #include <litesimd/bitwise.h>
 77 |  * #include <litesimd/helpers/iostream.h>
 78 |  *
 79 |  * int main()
 80 |  * {
 81 |  *     namespace ls = litesimd;
 82 |  *
 83 |  *     ls::t_int32_simd a( 3 ), b( 6 );
 84 |  *     std::cout << "bit_or( a, b ): " << ls::bit_or( a, b ) << std::endl;
 85 |  *     return 0;
 86 |  * }
 87 |  * ```
 88 |  * Output on a SSE compilation
 89 |  * ```
 90 |  * bit_or( a, b ): (7, 7, 7, 7)
 91 |  * ```
 92 |  */
 93 | template< typename ValueType_T, typename Tag_T = default_tag >
 94 | inline simd_type< ValueType_T, Tag_T >
 95 | bit_or( simd_type< ValueType_T, Tag_T > lhs, simd_type< ValueType_T, Tag_T > rhs ){}
 96 | 
 97 | /**
 98 |  * \ingroup bitwise
 99 |  * \brief Returns the bitwise XOR operation between passed parameters.
100 |  *
101 |  * \param lhs, rhs SIMD registers to apply bitwise XOR operation.
102 |  * \returns SIMD register with the result of the operation.
103 |  *
104 |  * **Example**
105 |  * ```{.cpp}
106 |  * #include <iostream>
107 |  * #include <litesimd/types.h>
108 |  * #include <litesimd/bitwise.h>
109 |  * #include <litesimd/helpers/iostream.h>
110 |  *
111 |  * int main()
112 |  * {
113 |  *     namespace ls = litesimd;
114 |  *
115 |  *     ls::t_int32_simd a( 3 ), b( 6 );
116 |  *     std::cout << "bit_xor( a, b ): " << ls::bit_xor( a, b ) << std::endl;
117 |  *     return 0;
118 |  * }
119 |  * ```
120 |  * Output on a SSE compilation
121 |  * ```
122 |  * bit_xor( a, b ): (5, 5, 5, 5)
123 |  * ```
124 |  */
125 | template< typename ValueType_T, typename Tag_T = default_tag >
126 | inline simd_type< ValueType_T, Tag_T >
127 | bit_xor( simd_type< ValueType_T, Tag_T > lhs, simd_type< ValueType_T, Tag_T > rhs ){}
128 | 
129 | /**
130 |  * \ingroup bitwise
131 |  * \brief Inverts all bits on a SIMD register.
132 |  *
133 |  * \param vec SIMD register to negate.
134 |  * \returns SIMD register with all bit inverted.
135 |  *
136 |  * **Example**
137 |  * ```{.cpp}
138 |  * #include <iostream>
139 |  * #include <litesimd/types.h>
140 |  * #include <litesimd/bitwise.h>
141 |  * #include <litesimd/helpers/iostream.h>
142 |  *
143 |  * int main()
144 |  * {
145 |  *     namespace ls = litesimd;
146 |  *
147 |  *     ls::t_int32_simd a( 0xfffffff0 );
148 |  *     std::cout << "bit_not( a ): " << ls::bit_not( a ) << std::endl;
149 |  *     return 0;
150 |  * }
151 |  * ```
152 |  * Output on a SSE compilation
153 |  * ```
154 |  * bit_not( a ): (15, 15, 15, 15)
155 |  * ```
156 |  */
157 | template< typename ValueType_T, typename Tag_T = default_tag >
158 | inline simd_type< ValueType_T, Tag_T >
159 | bit_not( simd_type< ValueType_T, Tag_T > vec )
160 | {
161 |     return bit_xor( vec, simd_type< ValueType_T, Tag_T >::ones() );
162 | }
163 | 
164 | } // namespace litesimd
165 | 
166 | #endif // LITESIMD_ARCH_COMMON_BITWISE_H
167 | 


--------------------------------------------------------------------------------
/include/litesimd/detail/arch/common/compare.h:
--------------------------------------------------------------------------------
  1 | // MIT License
  2 | //
  3 | // Copyright (c) 2018 André Tupinambá
  4 | //
  5 | // Permission is hereby granted, free of charge, to any person obtaining a copy
  6 | // of this software and associated documentation files (the "Software"), to deal
  7 | // in the Software without restriction, including without limitation the rights
  8 | // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
  9 | // copies of the Software, and to permit persons to whom the Software is
 10 | // furnished to do so, subject to the following conditions:
 11 | //
 12 | // The above copyright notice and this permission notice shall be included in all
 13 | // copies or substantial portions of the Software.
 14 | //
 15 | // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 16 | // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 17 | // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
 18 | // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
 19 | // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
 20 | // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
 21 | // SOFTWARE.
 22 | 
 23 | #ifndef LITESIMD_ARCH_COMMON_COMPARE_H
 24 | #define LITESIMD_ARCH_COMMON_COMPARE_H
 25 | 
 26 | #include <utility>
 27 | #include <litesimd/types.h>
 28 | 
 29 | namespace litesimd {
 30 | 
 31 | // Bit scan
 32 | // ---------------------------------------------------------------------------------------
 33 | template< typename Tag_T = default_tag >
 34 | std::pair<int, bool> bit_scan_forward( uint32_t bitmask ){ return std::make_pair( -1, false ); }
 35 | 
 36 | template< typename Tag_T = default_tag >
 37 | std::pair<int, bool> bit_scan_reverse( uint32_t bitmask ){ return std::make_pair( -1, false ); }
 38 | 
 39 | /**
 40 |  * \ingroup compare
 41 |  * \brief Converts a SIMD mask to a bitmask
 42 |  *
 43 |  * \param mask SIMD mask to be converted
 44 |  * \tparam ValueType_T Base type of original SIMD register
 45 |  * \returns Bitmask equivalent
 46 |  *
 47 |  * **Example**
 48 |  * ```{.cpp}
 49 |  * #include <iostream>
 50 |  * #include <litesimd/types.h>
 51 |  * #include <litesimd/compare.h>
 52 |  *
 53 |  * int main()
 54 |  * {
 55 |  *     namespace ls = litesimd;
 56 |  *
 57 |  *     ls::t_int32_simd x( 9, 8, 7, 6 );
 58 |  *     ls::t_int32_simd y( 9, 8, 5, 6 );
 59 |  *     auto mask = ls::equal_to( x, y ); // (0xFFFFFFFF, 0xFFFFFFFF, 0x00000000, 0xFFFFFFFF)
 60 |  *     std::cout << "mask_to_bitmask< int32_t >( mask ): " << std::hex
 61 |  *               << ls::mask_to_bitmask< int32_t >( mask ) << std::endl;
 62 |  *     return 0;
 63 |  * }
 64 |  * ```
 65 |  * Output on a SSE compilation
 66 |  * ```
 67 |  * mask_to_bitmask< int32_t >( mask ): ff0f
 68 |  * ```
 69 |  */
 70 | template< typename ValueType_T, typename Tag_T = default_tag >
 71 | inline typename simd_type< ValueType_T, Tag_T >::bitmask_type
 72 | mask_to_bitmask( simd_type< ValueType_T, Tag_T > mask )
 73 | {
 74 |     return 0;
 75 | }
 76 | 
 77 | /**
 78 |  * \ingroup compare
 79 |  * \brief Compares two SIMD registers and returns a mask representing the values of the first parameter is greater than the second parameter
 80 |  *
 81 |  * \param lhs, rhs Values to be compared
 82 |  * \tparam ValueType_T Base type of SIMD register
 83 |  * \returns Mask representing the values of the lhs parameter which are greater than the rhs parameter
 84 |  *
 85 |  * **Example**
 86 |  * ```{.cpp}
 87 |  * #include <iostream>
 88 |  * #include <litesimd/types.h>
 89 |  * #include <litesimd/compare.h>
 90 |  *
 91 |  * int main()
 92 |  * {
 93 |  *     namespace ls = litesimd;
 94 |  *
 95 |  *     ls::t_int32_simd x( 9, 8, 7, 6 );
 96 |  *     ls::t_int32_simd y( 9, 8, 5, 6 );
 97 |  *     std::cout << "greater( x, y ): " << std::hex << ls::greater( x, y ) << std::endl;
 98 |  *     return 0;
 99 |  * }
100 |  * ```
101 |  * Output on a SSE compilation
102 |  * ```
103 |  * greater( x, y ): (0, 0, ffffffff, 0)
104 |  * ```
105 |  */
106 | template< typename ValueType_T, typename Tag_T = default_tag >
107 | inline simd_type< ValueType_T, Tag_T >
108 | greater( simd_type< ValueType_T, Tag_T > lhs, simd_type< ValueType_T, Tag_T > rhs ){}
109 | 
110 | /**
111 |  * \ingroup compare
112 |  * \brief Compares two SIMD registers and returns a mask of equal values
113 |  *
114 |  * \param lhs, rhs Values to be compared
115 |  * \tparam ValueType_T Base type of SIMD register
116 |  * \returns Mask representing the values of the lhs parameter which are equal to the rhs parameter
117 |  *
118 |  * **Example**
119 |  * ```{.cpp}
120 |  * #include <iostream>
121 |  * #include <litesimd/types.h>
122 |  * #include <litesimd/compare.h>
123 |  *
124 |  * int main()
125 |  * {
126 |  *     namespace ls = litesimd;
127 |  *
128 |  *     ls::t_int32_simd x( 9, 8, 7, 6 );
129 |  *     ls::t_int32_simd y( 9, 8, 5, 6 );
130 |  *     std::cout << "equal_to( x, y ): " << std::hex << ls::equal_to( x, y ) << std::endl;
131 |  *     return 0;
132 |  * }
133 |  * ```
134 |  * Output on a SSE compilation
135 |  * ```
136 |  * equal_to( x, y ): (ffffffff, ffffffff, 0, ffffffff)
137 |  * ```
138 |  */
139 | template< typename ValueType_T, typename Tag_T = default_tag >
140 | inline simd_type< ValueType_T, Tag_T >
141 | equal_to( simd_type< ValueType_T, Tag_T > lhs, simd_type< ValueType_T, Tag_T > rhs ){}
142 | 
143 | template< typename ValueType_T, typename Tag_T = default_tag >
144 | struct all_of_op{};
145 | 
146 | template< typename ValueType_T, typename Tag_T = default_tag >
147 | struct all_of_bitmask_op{};
148 | 
149 | template< typename ValueType_T, typename Tag_T = default_tag >
150 | struct none_of_op{};
151 | 
152 | } // namespace litesimd
153 | 
154 | #endif // LITESIMD_ARCH_COMMON_COMPARE_H
155 | 


--------------------------------------------------------------------------------
/include/litesimd/detail/arch/sse/traits.h:
--------------------------------------------------------------------------------
  1 | // MIT License
  2 | //
  3 | // Copyright (c) 2018 André Tupinambá
  4 | //
  5 | // Permission is hereby granted, free of charge, to any person obtaining a copy
  6 | // of this software and associated documentation files (the "Software"), to deal
  7 | // in the Software without restriction, including without limitation the rights
  8 | // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
  9 | // copies of the Software, and to permit persons to whom the Software is
 10 | // furnished to do so, subject to the following conditions:
 11 | //
 12 | // The above copyright notice and this permission notice shall be included in all
 13 | // copies or substantial portions of the Software.
 14 | //
 15 | // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 16 | // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 17 | // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
 18 | // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
 19 | // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
 20 | // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
 21 | // SOFTWARE.
 22 | 
 23 | #ifndef LITESIMD_ARCH_SSE_TRAITS_H
 24 | #define LITESIMD_ARCH_SSE_TRAITS_H
 25 | 
 26 | #ifdef LITESIMD_HAS_SSE
 27 | 
 28 | #include <smmintrin.h>
 29 | #include <litesimd/detail/arch/common/traits.h>
 30 | 
 31 | namespace litesimd {
 32 | 
 33 | template<> struct traits<  int8_t, sse_tag >{
 34 |     using simd_type = __m128i;
 35 |     using bitmask_type = uint32_t;
 36 |     static inline simd_type zero() { return _mm_setzero_si128(); }
 37 |     static inline simd_type ones() { return _mm_cmpeq_epi8( zero(), zero() ); }
 38 |     static inline simd_type from_value( int8_t v ) { return _mm_set1_epi8( v ); }
 39 |     static inline simd_type from_values( int8_t v15, int8_t v14, int8_t v13, int8_t v12,
 40 |                                          int8_t v11, int8_t v10, int8_t  v9, int8_t  v8,
 41 |                                          int8_t  v7, int8_t  v6, int8_t  v5, int8_t  v4,
 42 |                                          int8_t  v3, int8_t  v2, int8_t  v1, int8_t  v0 )
 43 |     {
 44 |         return _mm_set_epi8( v15, v14, v13, v12, v11, v10,  v9,  v8,
 45 |                               v7,  v6,  v5,  v4,  v3,  v2,  v1,  v0 );
 46 |     }
 47 |     static inline simd_type iota() { return from_values( 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0 ); }
 48 | };
 49 | 
 50 | template<> struct traits<  int16_t, sse_tag >{
 51 |     using simd_type = __m128i;
 52 |     using bitmask_type = uint32_t;
 53 |     static inline simd_type zero() { return _mm_setzero_si128(); }
 54 |     static inline simd_type ones() { return _mm_cmpeq_epi8( zero(), zero() ); }
 55 |     static inline simd_type from_value( int16_t v ) { return _mm_set1_epi16( v ); }
 56 |     static inline simd_type from_values( int16_t v7, int16_t v6, int16_t v5, int16_t v4,
 57 |                                          int16_t v3, int16_t v2, int16_t v1, int16_t v0 )
 58 |     {
 59 |         return _mm_set_epi16( v7, v6, v5, v4, v3, v2, v1, v0 );
 60 |     }
 61 |     static inline simd_type iota() { return from_values( 7, 6, 5, 4, 3, 2, 1, 0 ); }
 62 | };
 63 | 
 64 | template<> struct traits<  int32_t, sse_tag >{
 65 |     using simd_type = __m128i;
 66 |     using bitmask_type = uint32_t;
 67 |     static inline simd_type zero() { return _mm_setzero_si128(); }
 68 |     static inline simd_type ones() { return _mm_cmpeq_epi8( zero(), zero() ); }
 69 |     static inline simd_type from_value( int32_t v ) { return _mm_set1_epi32( v ); }
 70 |     static inline simd_type from_values( int32_t v3, int32_t v2, int32_t v1, int32_t v0 )
 71 |     {
 72 |         return _mm_set_epi32( v3, v2, v1, v0 );
 73 |     }
 74 |     static inline simd_type iota() { return from_values( 3, 2, 1, 0 ); }
 75 | };
 76 | 
 77 | template<> struct traits<  int64_t, sse_tag >{
 78 |     using simd_type = __m128i;
 79 |     using bitmask_type = uint32_t;
 80 |     static inline simd_type zero() { return _mm_setzero_si128(); }
 81 |     static inline simd_type ones() { return _mm_cmpeq_epi8( zero(), zero() ); }
 82 |     static inline simd_type from_value( int64_t v ) { return _mm_set1_epi64x( v ); }
 83 |     static inline simd_type from_values( int64_t v1, int64_t v0 )
 84 |     {
 85 |         return _mm_set_epi64x( v1, v0 );
 86 |     }
 87 |     static inline simd_type iota() { return from_values( 1, 0 ); }
 88 | };
 89 | 
 90 | template<> struct traits<   float, sse_tag >{
 91 |     using simd_type = __m128;
 92 |     using bitmask_type = uint32_t;
 93 |     static inline simd_type zero() { return _mm_setzero_ps(); }
 94 |     static inline simd_type ones() { return _mm_cmpeq_ps( zero(), zero() ); }
 95 |     static inline simd_type from_value( float v ) { return _mm_set1_ps( v ); }
 96 |     static inline simd_type from_values( float v3, float v2, float v1, float v0 )
 97 |     {
 98 |         return _mm_set_ps( v3, v2, v1, v0 );
 99 |     }
100 |     static inline simd_type iota() { return from_values( 3, 2, 1, 0 ); }
101 | };
102 | 
103 | template<> struct traits<  double, sse_tag >{
104 |     using simd_type = __m128d;
105 |     using bitmask_type = uint32_t;
106 |     static inline simd_type zero() { return _mm_setzero_pd(); }
107 |     static inline simd_type ones() { return _mm_cmpeq_pd( zero(), zero() ); }
108 |     static inline simd_type from_value( double v ) { return _mm_set1_pd( v ); }
109 |     static inline simd_type from_values( double v1, double v0 )
110 |     {
111 |         return _mm_set_pd( v1, v0 );
112 |     }
113 |     static inline simd_type iota() { return from_values( 1, 0 ); }
114 | };
115 | 
116 | } // namespace litesimd
117 | 
118 | #endif // LITESIMD_HAS_SSE
119 | #endif // LITESIMD_ARCH_SSE_TRAITS_H
120 | 


--------------------------------------------------------------------------------
/include/litesimd/types.h:
--------------------------------------------------------------------------------
  1 | // MIT License
  2 | //
  3 | // Copyright (c) 2018 André Tupinambá
  4 | //
  5 | // Permission is hereby granted, free of charge, to any person obtaining a copy
  6 | // of this software and associated documentation files (the "Software"), to deal
  7 | // in the Software without restriction, including without limitation the rights
  8 | // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
  9 | // copies of the Software, and to permit persons to whom the Software is
 10 | // furnished to do so, subject to the following conditions:
 11 | //
 12 | // The above copyright notice and this permission notice shall be included in all
 13 | // copies or substantial portions of the Software.
 14 | //
 15 | // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 16 | // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 17 | // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
 18 | // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
 19 | // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
 20 | // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
 21 | // SOFTWARE.
 22 | 
 23 | #ifndef LITESIMD_TYPES_H
 24 | #define LITESIMD_TYPES_H
 25 | 
 26 | #include <cstdint>
 27 | #include <litesimd/detail/arch/traits.h>
 28 | 
 29 | namespace litesimd {
 30 | 
 31 | /**
 32 |  * \defgroup type SIMD type
 33 |  *
 34 |  * This defines the basic litesimd type.
 35 |  *
 36 |  * All this functions are accessable at `<litesimd/types.h>`
 37 |  */
 38 | 
 39 | /**
 40 |  * \ingroup type
 41 |  * \class simd_type
 42 |  * \brief Basic litesimd type
 43 |  *
 44 |  * The simd_type is transparently interoperable with SIMD intrincs to
 45 |  * allow the developer to perform more complex operations than originally
 46 |  * anticipated by the library.
 47 |  *
 48 |  * \tparam ValueType_T Base type of original SIMD register
 49 |  */
 50 | template< typename ValueType_T, typename Tag_T = default_tag >
 51 | class simd_type
 52 | {
 53 |     using simd_traits = traits< ValueType_T, Tag_T >;
 54 | 
 55 | public:
 56 |     /// Base type of original SIMD register
 57 |     using simd_value_type = ValueType_T;
 58 |     /// SIMD instruction TAG selector
 59 |     using simd_tag = Tag_T;
 60 | 
 61 |     /// Type of this class
 62 |     using type = simd_type< simd_value_type, simd_tag >;
 63 |     /// Intrincs original type
 64 |     using inner_type = typename simd_traits::simd_type;
 65 |     /// Type of bitmasks
 66 |     using bitmask_type = typename simd_traits::bitmask_type;
 67 |     /// Type of index
 68 |     using index_type = int;
 69 |     /// How many values fit on SIMD register
 70 |     constexpr static size_t simd_size = sizeof(inner_type) / sizeof(simd_value_type);
 71 | 
 72 |     simd_type(){}
 73 | 
 74 |     /// Implicit contructor from intrincs SIMD type
 75 |     simd_type( inner_type v ) : v_(v) {}
 76 | 
 77 |     /// Implicit cast to intrincs SIMD type
 78 |     operator inner_type() const { return v_; }
 79 | 
 80 |     /**
 81 |      * \brief Construct a simd_type with all values equal
 82 |      *
 83 |      * \param v Value to be set
 84 |      *
 85 |      * **Example**
 86 |      * ```{.cpp}
 87 |      * #include <iostream>
 88 |      * #include <litesimd/types.h>
 89 |      * #include <litesimd/helpers/iostream.h>
 90 |      *
 91 |      * int main()
 92 |      * {
 93 |      *     namespace ls = litesimd;
 94 |      *
 95 |      *     ls::t_int32_simd x( 3 );
 96 |      *     std::cout << "x: " << x << std::endl;
 97 |      *     return 0;
 98 |      * }
 99 |      * ```
100 |      * Output on a SSE compilation
101 |      * ```
102 |      * x: (3, 3, 3, 3)
103 |      * ```
104 |      */
105 |     explicit simd_type( simd_value_type v ) : v_( simd_traits::from_value( v ) ) {}
106 | 
107 |     /**
108 |      * \brief Construct a simd_type setting all values
109 |      *
110 |      * \param i, j, v Values to be set
111 |      *
112 |      * This contructor have different parameter count to match with how
113 |      * many values fits on simd_type.
114 |      *
115 |      * **Example**
116 |      * ```{.cpp}
117 |      * #include <iostream>
118 |      * #include <litesimd/types.h>
119 |      * #include <litesimd/helpers/iostream.h>
120 |      *
121 |      * int main()
122 |      * {
123 |      *     namespace ls = litesimd;
124 |      *
125 |      *     ls::t_int32_simd x( 3, 2, 1, 0 );
126 |      *     std::cout << "x: " << x << std::endl;
127 |      *     return 0;
128 |      * }
129 |      * ```
130 |      * Output on a SSE compilation
131 |      * ```
132 |      * x: (3, 2, 1, 0)
133 |      * ```
134 |      */
135 |     template< typename... Value_T >
136 |     simd_type( simd_value_type i, simd_value_type j, Value_T... v ) :
137 |         v_( simd_traits::from_values( i, j, v... ) ) {}
138 | 
139 |     /// Returns a simd_type with all values zero
140 |     static inline simd_type zero() { return simd_type( simd_traits::zero() ); }
141 | 
142 |     /// Returns a simd_type with all bits 1
143 |     static inline simd_type ones() { return simd_type( simd_traits::ones() ); }
144 | 
145 |     /// Returns a simd_type with increasing values from 0 to simd_size -1, eg. (3, 2, 1, 0).
146 |     static inline simd_type iota() { return simd_type( simd_traits::iota() ); }
147 | 
148 | private:
149 |     inner_type v_;
150 | };
151 | 
152 | /// Shortcut for int8_t simd_type on default instruction set
153 | using  t_int8_simd = simd_type<  int8_t, default_tag >;
154 | /// Shortcut for int16_t simd_type on default instruction set
155 | using t_int16_simd = simd_type< int16_t, default_tag >;
156 | /// Shortcut for int32_t simd_type on default instruction set
157 | using t_int32_simd = simd_type< int32_t, default_tag >;
158 | /// Shortcut for int64_t simd_type on default instruction set
159 | using t_int64_simd = simd_type< int64_t, default_tag >;
160 | 
161 | /// Shortcut for float simd_type on default instruction set
162 | using t_float_simd  = simd_type<  float, default_tag >;
163 | /// Shortcut for double simd_type on default instruction set
164 | using t_double_simd = simd_type< double, default_tag >;
165 | 
166 | } // namespace litesimd
167 | 
168 | #endif // LITESIMD_TYPES_H
169 | 


--------------------------------------------------------------------------------
/include/litesimd/detail/arch/avx/traits.h:
--------------------------------------------------------------------------------
  1 | // MIT License
  2 | //
  3 | // Copyright (c) 2018 André Tupinambá
  4 | //
  5 | // Permission is hereby granted, free of charge, to any person obtaining a copy
  6 | // of this software and associated documentation files (the "Software"), to deal
  7 | // in the Software without restriction, including without limitation the rights
  8 | // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
  9 | // copies of the Software, and to permit persons to whom the Software is
 10 | // furnished to do so, subject to the following conditions:
 11 | //
 12 | // The above copyright notice and this permission notice shall be included in all
 13 | // copies or substantial portions of the Software.
 14 | //
 15 | // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 16 | // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 17 | // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
 18 | // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
 19 | // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
 20 | // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
 21 | // SOFTWARE.
 22 | 
 23 | #ifndef LITESIMD_ARCH_AVX_TRAITS_H
 24 | #define LITESIMD_ARCH_AVX_TRAITS_H
 25 | 
 26 | #ifdef LITESIMD_HAS_AVX
 27 | 
 28 | #include <immintrin.h>
 29 | #include <litesimd/detail/arch/common/traits.h>
 30 | 
 31 | namespace litesimd {
 32 | 
 33 | template<> struct traits<  int8_t, avx_tag >{
 34 |     using simd_type = __m256i;
 35 |     using bitmask_type = uint32_t;
 36 |     static inline simd_type zero() { return _mm256_setzero_si256(); }
 37 |     static inline simd_type ones() { return _mm256_cmpeq_epi8( zero(), zero() ); }
 38 |     static inline simd_type from_value( int8_t v ) { return _mm256_set1_epi8( v ); }
 39 |     static inline simd_type from_values( int8_t v31, int8_t v30, int8_t v29, int8_t v28,
 40 |                                          int8_t v27, int8_t v26, int8_t v25, int8_t v24,
 41 |                                          int8_t v23, int8_t v22, int8_t v21, int8_t v20,
 42 |                                          int8_t v19, int8_t v18, int8_t v17, int8_t v16,
 43 |                                          int8_t v15, int8_t v14, int8_t v13, int8_t v12,
 44 |                                          int8_t v11, int8_t v10, int8_t  v9, int8_t  v8,
 45 |                                          int8_t  v7, int8_t  v6, int8_t  v5, int8_t  v4,
 46 |                                          int8_t  v3, int8_t  v2, int8_t  v1, int8_t  v0 )
 47 |     {
 48 |         return _mm256_set_epi8( v31, v30, v29, v28, v27, v26, v25, v24,
 49 |                                 v23, v22, v21, v20, v19, v18, v17, v16,
 50 |                                 v15, v14, v13, v12, v11, v10,  v9,  v8,
 51 |                                  v7,  v6,  v5,  v4,  v3,  v2,  v1,  v0 );
 52 |     }
 53 |     static inline simd_type iota() { return from_values( 31, 30, 29, 28, 27, 26, 25, 24, 23, 22, 21, 20, 19, 18, 17, 16,
 54 |                                                          15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0 ); }
 55 | };
 56 | 
 57 | template<> struct traits<  int16_t, avx_tag >{
 58 |     using simd_type = __m256i;
 59 |     using bitmask_type = uint32_t;
 60 |     static inline simd_type zero() { return _mm256_setzero_si256(); }
 61 |     static inline simd_type ones() { return _mm256_cmpeq_epi8( zero(), zero() ); }
 62 |     static inline simd_type from_value( int16_t v ) { return _mm256_set1_epi16( v ); }
 63 |     static inline simd_type from_values( int16_t v15, int16_t v14, int16_t v13, int16_t v12,
 64 |                                          int16_t v11, int16_t v10, int16_t  v9, int16_t  v8,
 65 |                                          int16_t  v7, int16_t  v6, int16_t  v5, int16_t  v4,
 66 |                                          int16_t  v3, int16_t  v2, int16_t  v1, int16_t  v0 )
 67 |     {
 68 |         return _mm256_set_epi16( v15, v14, v13, v12, v11, v10,  v9,  v8,
 69 |                                   v7,  v6,  v5,  v4,  v3,  v2,  v1,  v0 );
 70 |     }
 71 |     static inline simd_type iota() { return from_values( 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0 ); }
 72 | };
 73 | 
 74 | template<> struct traits<  int32_t, avx_tag >{
 75 |     using simd_type = __m256i;
 76 |     using bitmask_type = uint32_t;
 77 |     static inline simd_type zero() { return _mm256_setzero_si256(); }
 78 |     static inline simd_type ones() { return _mm256_cmpeq_epi8( zero(), zero() ); }
 79 |     static inline simd_type from_value( int32_t v ) { return _mm256_set1_epi32( v ); }
 80 |     static inline simd_type from_values( int32_t v7, int32_t v6, int32_t v5, int32_t v4,
 81 |                                          int32_t v3, int32_t v2, int32_t v1, int32_t v0 )
 82 |     {
 83 |         return _mm256_set_epi32( v7, v6, v5, v4, v3, v2, v1, v0 );
 84 |     }
 85 |     static inline simd_type iota() { return from_values( 7, 6, 5, 4, 3, 2, 1, 0 ); }
 86 | };
 87 | 
 88 | template<> struct traits<  int64_t, avx_tag >{
 89 |     using simd_type = __m256i;
 90 |     using bitmask_type = uint32_t;
 91 |     static inline simd_type zero() { return _mm256_setzero_si256(); }
 92 |     static inline simd_type ones() { return _mm256_cmpeq_epi8( zero(), zero() ); }
 93 |     static inline simd_type from_value( int64_t v ) { return _mm256_set1_epi64x( v ); }
 94 |     static inline simd_type from_values( int64_t v3, int64_t v2, int64_t v1, int64_t v0 )
 95 |     {
 96 |         return _mm256_set_epi64x( v3, v2, v1, v0 );
 97 |     }
 98 |     static inline simd_type iota() { return from_values( 3, 2, 1, 0 ); }
 99 | };
100 | 
101 | template<> struct traits<   float, avx_tag >{
102 |     using simd_type = __m256;
103 |     using bitmask_type = uint32_t;
104 |     static inline simd_type zero() { return _mm256_setzero_ps(); }
105 |     static inline simd_type ones() { return _mm256_cmp_ps( zero(), zero(), _CMP_GT_OQ ); }
106 |     static inline simd_type from_value( float v ) { return _mm256_set1_ps( v ); }
107 |     static inline simd_type from_values( float v7, float v6, float v5, float v4,
108 |                                          float v3, float v2, float v1, float v0 )
109 |     {
110 |         return _mm256_set_ps( v7, v6, v5, v4, v3, v2, v1, v0 );
111 |     }
112 |     static inline simd_type iota() { return from_values( 7, 6, 5, 4, 3, 2, 1, 0 ); }
113 | };
114 | 
115 | template<> struct traits<  double, avx_tag >{
116 |     using simd_type = __m256d;
117 |     using bitmask_type = uint32_t;
118 |     static inline simd_type zero() { return _mm256_setzero_pd(); }
119 |     static inline simd_type ones() { return _mm256_cmp_pd( zero(), zero(), _CMP_GT_OQ ); }
120 |     static inline simd_type from_value( double v ) { return _mm256_set1_pd( v ); }
121 |     static inline simd_type from_values( double v3, double v2, double v1, double v0 )
122 |     {
123 |         return _mm256_set_pd( v3, v2, v1, v0 );
124 |     }
125 |     static inline simd_type iota() { return from_values( 3, 2, 1, 0 ); }
126 | };
127 | 
128 | } // namespace litesimd
129 | 
130 | #endif // LITESIMD_HAS_AVX
131 | #endif // LITESIMD_ARCH_AVX_TRAITS_H
132 | 


--------------------------------------------------------------------------------
/include/litesimd/detail/arch/sse/compare.h:
--------------------------------------------------------------------------------
  1 | // MIT License
  2 | //
  3 | // Copyright (c) 2018 André Tupinambá
  4 | //
  5 | // Permission is hereby granted, free of charge, to any person obtaining a copy
  6 | // of this software and associated documentation files (the "Software"), to deal
  7 | // in the Software without restriction, including without limitation the rights
  8 | // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
  9 | // copies of the Software, and to permit persons to whom the Software is
 10 | // furnished to do so, subject to the following conditions:
 11 | //
 12 | // The above copyright notice and this permission notice shall be included in all
 13 | // copies or substantial portions of the Software.
 14 | //
 15 | // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 16 | // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 17 | // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
 18 | // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
 19 | // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
 20 | // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
 21 | // SOFTWARE.
 22 | 
 23 | #ifndef LITESIMD_ARCH_SSE_COMPARE_H
 24 | #define LITESIMD_ARCH_SSE_COMPARE_H
 25 | 
 26 | #ifdef LITESIMD_HAS_SSE
 27 | 
 28 | #include <litesimd/types.h>
 29 | #include <litesimd/detail/arch/common/compare.h>
 30 | #include <litesimd/detail/helper_macros.h>
 31 | 
 32 | namespace litesimd {
 33 | 
 34 | 
 35 | // Bit scan
 36 | // ---------------------------------------------------------------------------------------
 37 | template<> inline std::pair<int, bool>
 38 | bit_scan_forward< sse_tag >( uint32_t bitmask )
 39 | {
 40 | #ifdef _WIN32
 41 |     unsigned long index;
 42 |     return (0 == _BitScanForward( &index, bitmask ))
 43 |         ? std::make_pair( -1, false )
 44 |         : std::make_pair( index, true );
 45 | #else
 46 |     return (bitmask == 0)
 47 |         ? std::make_pair( -1, false )
 48 |         : std::make_pair( _bit_scan_forward( bitmask ), true );
 49 | #endif
 50 | }
 51 | 
 52 | template<> inline std::pair<int, bool>
 53 | bit_scan_reverse< sse_tag >( uint32_t bitmask )
 54 | {
 55 | #ifdef _WIN32
 56 |     unsigned long index;
 57 |     return (0 == _BitScanReverse( &index, bitmask ))
 58 |         ? std::make_pair( -1, false )
 59 |         : std::make_pair( index, true );
 60 | #else
 61 |     return (bitmask == 0)
 62 |         ? std::make_pair( -1, false )
 63 |         : std::make_pair( _bit_scan_reverse( bitmask ), true );
 64 | #endif
 65 | }
 66 | 
 67 | // Mask to bitmask
 68 | // ---------------------------------------------------------------------------------------
 69 | #define DEF_MASK_TO_BITMASK( TYPE_T, CMD ) \
 70 | template<> inline typename simd_type< TYPE_T, sse_tag >::bitmask_type \
 71 | mask_to_bitmask< TYPE_T, sse_tag >( simd_type< TYPE_T, sse_tag > mask ) { \
 72 |     return CMD( mask ); \
 73 | }
 74 | DEF_MASK_TO_BITMASK( int8_t,  _mm_movemask_epi8 )
 75 | DEF_MASK_TO_BITMASK( int16_t, _mm_movemask_epi8 )
 76 | DEF_MASK_TO_BITMASK( int32_t, _mm_movemask_epi8 )
 77 | DEF_MASK_TO_BITMASK( int64_t, _mm_movemask_epi8 )
 78 | DEF_MASK_TO_BITMASK( float,   _mm_movemask_ps )
 79 | DEF_MASK_TO_BITMASK( double,  _mm_movemask_pd )
 80 | #undef DEF_MASK_TO_BITMASK
 81 | 
 82 | // Greater than
 83 | // ---------------------------------------------------------------------------------------
 84 | #define DEF_GREATER_THAN( TYPE_T, CMD ) \
 85 | template<> inline simd_type< TYPE_T, sse_tag > \
 86 | greater< TYPE_T, sse_tag >( simd_type< TYPE_T, sse_tag > lhs, \
 87 |                                  simd_type< TYPE_T, sse_tag > rhs ) { \
 88 |     return CMD( lhs, rhs ); \
 89 | }
 90 | 
 91 | DEF_GREATER_THAN( int8_t,  _mm_cmpgt_epi8 )
 92 | DEF_GREATER_THAN( int16_t, _mm_cmpgt_epi16 )
 93 | DEF_GREATER_THAN( int32_t, _mm_cmpgt_epi32 )
 94 | DEF_GREATER_THAN( int64_t, _mm_cmpgt_epi64 )
 95 | DEF_GREATER_THAN( float,   _mm_cmpgt_ps )
 96 | DEF_GREATER_THAN( double,  _mm_cmpgt_pd )
 97 | #undef DEF_GREATER_THAN
 98 | 
 99 | // Equals
100 | // ---------------------------------------------------------------------------------------
101 | #define DEF_EQUALS( TYPE_T, CMD ) \
102 | template<> inline simd_type< TYPE_T, sse_tag > \
103 | equal_to< TYPE_T, sse_tag >( simd_type< TYPE_T, sse_tag > lhs, \
104 |                            simd_type< TYPE_T, sse_tag > rhs ) { \
105 |     return CMD( lhs, rhs ); \
106 | }
107 | 
108 | DEF_EQUALS( int8_t,  _mm_cmpeq_epi8 )
109 | DEF_EQUALS( int16_t, _mm_cmpeq_epi16 )
110 | DEF_EQUALS( int32_t, _mm_cmpeq_epi32 )
111 | DEF_EQUALS( int64_t, _mm_cmpeq_epi64 )
112 | DEF_EQUALS( float,   _mm_cmpeq_ps )
113 | DEF_EQUALS( double,  _mm_cmpeq_pd )
114 | #undef DEF_EQUALS
115 | 
116 | // all_of
117 | // ---------------------------------------------------------------------------------------
118 | template< typename ValueType_T >
119 | struct all_of_bitmask_op< ValueType_T,
120 |               typename std::enable_if<std::is_integral<ValueType_T>::value, sse_tag>::type >
121 | {
122 |     inline bool operator()( typename simd_type< ValueType_T, sse_tag >::bitmask_type bitmask )
123 |     {
124 |         return (bitmask == 0xffff);
125 |     }
126 | };
127 | 
128 | template< typename ValueType_T >
129 | struct all_of_bitmask_op< ValueType_T,
130 |               typename std::enable_if<std::is_floating_point<ValueType_T>::value, sse_tag>::type >
131 | {
132 |     inline bool operator()( typename simd_type< ValueType_T, sse_tag >::bitmask_type bitmask )
133 |     {
134 |         constexpr static typename simd_type< ValueType_T, sse_tag >::bitmask_type true_mask =
135 |             (1 << simd_type< ValueType_T, sse_tag >::simd_size) -1;
136 |         return (bitmask == true_mask);
137 |     }
138 | };
139 | 
140 | template< typename ValueType_T >
141 | struct all_of_op< ValueType_T,
142 |               typename std::enable_if<std::is_integral<ValueType_T>::value, sse_tag>::type >
143 | {
144 |     inline bool operator()( simd_type< ValueType_T, sse_tag > mask )
145 |     {
146 |         return !!_mm_test_all_ones( mask );
147 |     }
148 | };
149 | 
150 | template< typename ValueType_T >
151 | struct all_of_op< ValueType_T,
152 |               typename std::enable_if<std::is_floating_point<ValueType_T>::value, sse_tag>::type >
153 | {
154 |     inline bool operator()( simd_type< ValueType_T, sse_tag > mask )
155 |     {
156 |         return all_of_bitmask_op< ValueType_T, sse_tag >()( mask_to_bitmask< ValueType_T, sse_tag >( mask ) );
157 |     }
158 | };
159 | 
160 | // none_of
161 | // ---------------------------------------------------------------------------------------
162 | template< typename ValueType_T >
163 | struct none_of_op< ValueType_T,
164 |               typename std::enable_if<std::is_integral<ValueType_T>::value, sse_tag>::type >
165 | {
166 |     inline bool operator()( simd_type< ValueType_T, sse_tag > mask )
167 |     {
168 |         return !!_mm_testz_si128( mask, simd_type< ValueType_T, sse_tag >::ones() );
169 |     }
170 | };
171 | 
172 | template< typename ValueType_T >
173 | struct none_of_op< ValueType_T,
174 |               typename std::enable_if<std::is_floating_point<ValueType_T>::value, sse_tag>::type >
175 | {
176 |     inline bool operator()( simd_type< ValueType_T, sse_tag > mask )
177 |     {
178 |         return (0 == mask_to_bitmask< ValueType_T, sse_tag >( mask ) );
179 |     }
180 | };
181 | 
182 | } // namespace litesimd
183 | 
184 | #endif // LITESIMD_HAS_SSE
185 | #endif // LITESIMD_ARCH_SSE_COMPARE_H
186 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | [![Build Status](https://travis-ci.org/andrelrt/litesimd.svg?branch=master)](https://travis-ci.org/andrelrt/litesimd) [![Build status](https://ci.appveyor.com/api/projects/status/t3fmylykanoma9ja/branch/master?svg=true)](https://ci.appveyor.com/project/andrelrt/litesimd/branch/master) [![License: MIT](https://img.shields.io/badge/License-MIT-green.svg)](https://opensource.org/licenses/MIT) [![Documentation](https://codedocs.xyz/andrelrt/litesimd.svg)](https://codedocs.xyz/andrelrt/litesimd/)
  2 | 
  3 | Litesimd is a no overhead, header only, C++ library for SIMD processing. This library goal is to provide tools for developers to incorporate SIMD processing in all kinds of algorithms not only for calculations. To achieve this goal, some design principles are observed.
  4 | 
  5 | ## Design principles
  6 | 
  7 | #### SIMD for all kind of algorithms
  8 | 
  9 | Typically SIMD is used for mathematical algorithms, such as linear algebra, FFT or imaging processing. However, the processor has several SIMD instructions for all purposes. Litesimd library has an emphasis on SIMD comparison and data shuffle instead of math operations.
 10 | 
 11 | #### Not conceal the SIMD complexity
 12 | 
 13 | By design, the library does not attempt to hide the complexity of using SIMD. When using any SIMD library, the developer needs to understand what is happening anyway. So why try to hide this complexity from him? Some C++ operators are (or will be) overloaded for convenience, but this is not the library focus.
 14 | 
 15 | #### Intrincs interoperability
 16 | 
 17 | Any SIMD library typically covers a smaller scope than the total set of processor SIMD instructions. Litesimd library must be transparently interoperable with SIMD intrincs, allowing the developer to perform more complex operations than originally anticipated by the library.
 18 | 
 19 | #### No memory access
 20 | 
 21 | Processor instructions are much faster than memory access. Although memory access is unavoidable, the litesimd library prefers processor instructions to manipulate or enumerate values within the SIMD registers to minimize memory use.
 22 | 
 23 | ## Example
 24 | 
 25 | ```cpp
 26 | // Compiled with
 27 | //
 28 | // g++ -std=c++11 -O3 -mavx2 -I<path/to/litesimd/include> greater.cpp -o greater
 29 | 
 30 | #include <iostream>
 31 | #include <litesimd/compare.h>
 32 | #include <litesimd/helpers/iostream.h>
 33 | 
 34 | int main()
 35 | {
 36 |     namespace ls = litesimd;
 37 | 
 38 |     // int32_t vector in default instruction set (AVX)
 39 |     ls::t_int32_simd cmp;
 40 | 
 41 |     // litesimd types are intrincs compatible
 42 |     cmp = _mm256_set_epi32( 80, 70, 60, 50, 40, 30, 20, 10 );
 43 | 
 44 |     int32_t val = 5;
 45 | 
 46 |     // int32_simd_size is how many int32_t fits on t_int32_simd (8)
 47 |     for( size_t i = 0; i <= ls::t_int32_simd::simd_size; ++i )
 48 |     {
 49 |         // Compare 'val' against all 'cmp' values
 50 |         uint32_t bitmask = ls::greater_bitmask( val, cmp );
 51 | 
 52 |         // As 'cmp' is sorted, we can use the bitmask to find the
 53 |         // last item which 'val' is greater
 54 |         //
 55 |         // Returns values between [-1, ls::int32_simd_size)
 56 |         int index = ls::bitmask_last_index< int32_t >( bitmask );
 57 | 
 58 |         // greater_last_index could be called instead
 59 |         // greater_bitmask + bitmask_last_index
 60 |         //
 61 |         // int index = ls::greater_last_index( val, cmp );
 62 | 
 63 |         if( index < 0 )
 64 |         {
 65 |             std::cout << "The value " << val
 66 |                       << " is less than all values of " << cmp
 67 |                       << std::endl;
 68 |         }
 69 |         else if( index == ls::int32_simd_size -1 )
 70 |         {
 71 |             std::cout << "The value " << val
 72 |                       << " is greater than all values of " << cmp
 73 |                       << std::endl;
 74 |         }
 75 |         else
 76 |         {
 77 |             std::cout << "The value " << val
 78 |                       << " is between items " << index
 79 |                       << " and " << index + 1
 80 |                       << " of " << cmp
 81 |                       << std::endl;
 82 |         }
 83 | 
 84 |         val += 10;
 85 |     }
 86 |     return 0;
 87 | }
 88 | ```
 89 | This will produce the follow output:
 90 | 
 91 | ```
 92 | $ ./greater
 93 | The value 5 is less than all values of (80, 70, 60, 50, 40, 30, 20, 10)
 94 | The value 15 is between items 0 and 1 of (80, 70, 60, 50, 40, 30, 20, 10)
 95 | The value 25 is between items 1 and 2 of (80, 70, 60, 50, 40, 30, 20, 10)
 96 | The value 35 is between items 2 and 3 of (80, 70, 60, 50, 40, 30, 20, 10)
 97 | The value 45 is between items 3 and 4 of (80, 70, 60, 50, 40, 30, 20, 10)
 98 | The value 55 is between items 4 and 5 of (80, 70, 60, 50, 40, 30, 20, 10)
 99 | The value 65 is between items 5 and 6 of (80, 70, 60, 50, 40, 30, 20, 10)
100 | The value 75 is between items 6 and 7 of (80, 70, 60, 50, 40, 30, 20, 10)
101 | The value 85 is greater than all values of (80, 70, 60, 50, 40, 30, 20, 10)
102 | ```
103 | 
104 | ## Directory structure
105 | 
106 | ```
107 | litesimd/
108 |     doc/                        ; Doxygen project
109 |     include/litesimd/
110 |         algorithm/
111 |             for_each.h          ; for_each item of simd_type, also for_each index of bitmask
112 |             iota.h              ; Fill vetor with [0, simd_size), eg. (3, 2, 1, 0)
113 |             minmax.h            ; Min and max functions
114 |         detail/                 ; Internal functions, classes and architecture dependent code. Should not be included directly
115 |         helpers/
116 |             containers.h        ; Aligned std containers, depends on boost::align
117 |             iostream.h          ; operator<< overload for litesimd types
118 |         algorithm.h             ; Includes all algorithms
119 |         arithmetic.h            ; add, sub, mul, mullo, mulhi, div functions
120 |         bitwise.h               ; bit_and, bit_or, bit_xor and bit_not functions
121 |         compare.h               ; greater, equal_to, mask_to_bitmask, bitmask_to_high/low_index
122 |         intravector.h           ; generic horizontal reduction
123 |         shuffle.h               ; high/low_insert, blend, get/set<>
124 |         types.h                 ; simd_type
125 |     samples/
126 |         binary_search/          ; Benchmark lower_bound implementations
127 |         boyer_moore_horspool/   ; Substring search using SIMD (WIP: still slower than boost, but faster than std::string::find)
128 |         bubble_sort/            ; Classic bubble sort in SIMD style
129 |         greater/                ; Simple greater than sample (the same of above)
130 |         nway_tree/              ; Another approach for same lower_bound search, using trees
131 |         to_lower/               ; ASCII to_lower benchmark
132 |     test/                       ; Unit tests
133 | ```
134 | 
135 | ## Building
136 | 
137 | As a header only library, the building process is only for samples, test and documentation.
138 | 
139 | ```
140 | $ mkdir build
141 | $ cd build
142 | $ cmake -DCMAKE_BUILD_TYPE=Release <path/to/litesimd>
143 | $ make
144 | $ make install
145 | ```
146 | 
147 | Boost libraries are required to build the samples.
148 | 
149 | ## Support
150 | 
151 | Litesimd is tested on follow environments:
152 | 
153 | - Linux
154 |   - GCC: 4.8, 4.9, 5, 6, 7
155 |   - clang: 4, 5, 6
156 | - MacOSX
157 |   - XCode: 8.3, 9, 9.1, 9.2, 9.3, 9.4
158 | - Windows (WIP)
159 |   - MSVC: VS 2015, VS 2017
160 | 
161 | Litesimd supports the follow instructions set:
162 | 
163 | - x86
164 |   - SSE4.2, AVX2
165 | 


--------------------------------------------------------------------------------
/include/litesimd/shuffle.h:
--------------------------------------------------------------------------------
  1 | // MIT License
  2 | //
  3 | // Copyright (c) 2018 André Tupinambá
  4 | //
  5 | // Permission is hereby granted, free of charge, to any person obtaining a copy
  6 | // of this software and associated documentation files (the "Software"), to deal
  7 | // in the Software without restriction, including without limitation the rights
  8 | // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
  9 | // copies of the Software, and to permit persons to whom the Software is
 10 | // furnished to do so, subject to the following conditions:
 11 | //
 12 | // The above copyright notice and this permission notice shall be included in all
 13 | // copies or substantial portions of the Software.
 14 | //
 15 | // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 16 | // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 17 | // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
 18 | // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
 19 | // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
 20 | // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
 21 | // SOFTWARE.
 22 | 
 23 | #ifndef LITESIMD_SHUFFLE_H
 24 | #define LITESIMD_SHUFFLE_H
 25 | 
 26 | #include <litesimd/types.h>
 27 | #include <litesimd/detail/arch/shuffle.h>
 28 | #include <litesimd/detail/helper_macros.h>
 29 | 
 30 | namespace litesimd {
 31 | 
 32 | /**
 33 |  * \defgroup shuffle Register manipulation
 34 |  *
 35 |  * In litesimd, the shuffle group has functions to manipulate the SIMD register.
 36 |  *
 37 |  * All this functions are accessable at `<litesimd/shuffle.h>`
 38 |  */
 39 | 
 40 | /**
 41 |  * \ingroup shuffle
 42 |  * \brief Shift all values one index lower inside the SIMD register and
 43 |  *        insert a value on the highest index.
 44 |  *
 45 |  * \param vec SIMD register
 46 |  * \param val Value to be inserted
 47 |  * \tparam ValueType_T Base type of original SIMD register
 48 |  * \returns SIMD register with the highest index modified
 49 |  *
 50 |  * **Example**
 51 |  * ```{.cpp}
 52 |  * #include <iostream>
 53 |  * #include <litesimd/types.h>
 54 |  * #include <litesimd/shuffle.h>
 55 |  * #include <litesimd/helpers/iostream.h>
 56 |  *
 57 |  * int main()
 58 |  * {
 59 |  *     namespace ls = litesimd;
 60 |  *
 61 |  *     ls::t_int32_simd x( 3, 2, 1, 0 );
 62 |  *     std::cout << "high_insert( x, 9 ): "
 63 |  *               << ls::high_insert( x, 9 ) << std::endl;
 64 |  *     return 0;
 65 |  * }
 66 |  * ```
 67 |  * Output on a SSE compilation
 68 |  * ```
 69 |  * high_insert( x, 9 ): (9, 3, 2, 1)
 70 |  * ```
 71 |  */
 72 | template< typename SimdType_T, typename SimdType_T::simd_value_type* = nullptr >
 73 | inline SimdType_T
 74 | high_insert( SimdType_T vec, typename SimdType_T::simd_value_type val )
 75 | {
 76 |     return high_insert< typename SimdType_T::simd_value_type,
 77 |                         typename SimdType_T::simd_tag >( vec, val );
 78 | }
 79 | 
 80 | /**
 81 |  * \ingroup shuffle
 82 |  * \brief Shift all values one index lower inside the SIMD register and
 83 |  *        insert a value on the lowest index.
 84 |  *
 85 |  * \param vec SIMD register
 86 |  * \param val Value to be inserted
 87 |  * \tparam ValueType_T Base type of original SIMD register
 88 |  * \returns SIMD register with the lowest index modified
 89 |  *
 90 |  * **Example**
 91 |  * ```{.cpp}
 92 |  * #include <iostream>
 93 |  * #include <litesimd/types.h>
 94 |  * #include <litesimd/shuffle.h>
 95 |  * #include <litesimd/helpers/iostream.h>
 96 |  *
 97 |  * int main()
 98 |  * {
 99 |  *     namespace ls = litesimd;
100 |  *
101 |  *     ls::t_int32_simd x( 3, 2, 1, 0 );
102 |  *     std::cout << "low_insert( x, 9 ): "
103 |  *               << ls::low_insert( x, 9 ) << std::endl;
104 |  *     return 0;
105 |  * }
106 |  * ```
107 |  * Output on a SSE compilation
108 |  * ```
109 |  * low_insert( x, 9 ): (2, 1, 0, 9)
110 |  * ```
111 |  */
112 | template< typename SimdType_T, typename SimdType_T::simd_value_type* = nullptr >
113 | inline SimdType_T
114 | low_insert( SimdType_T vec, typename SimdType_T::simd_value_type val )
115 | {
116 |     return low_insert< typename SimdType_T::simd_value_type,
117 |                        typename SimdType_T::simd_tag >( vec, val );
118 | }
119 | 
120 | // Blend helper functions (comments on arch/common)
121 | // ---------------------------------------------------------------------------------------
122 | template< typename ValueType_T, typename Tag_T = default_tag >
123 | inline simd_type< ValueType_T, Tag_T >
124 | blend( simd_type< ValueType_T, Tag_T > mask,
125 |        ValueType_T trueVal,
126 |        simd_type< ValueType_T, Tag_T > falseVal )
127 | {
128 |     return blend< ValueType_T, Tag_T >( mask,
129 |                                         simd_type< ValueType_T, Tag_T >( trueVal ),
130 |                                         falseVal );
131 | }
132 | 
133 | template< typename ValueType_T, typename Tag_T = default_tag >
134 | inline simd_type< ValueType_T, Tag_T >
135 | blend( simd_type< ValueType_T, Tag_T > mask,
136 |        simd_type< ValueType_T, Tag_T > trueVal,
137 |        ValueType_T falseVal )
138 | {
139 |     return blend< ValueType_T, Tag_T >( mask,
140 |                                         trueVal,
141 |                                         simd_type< ValueType_T, Tag_T >( falseVal ) );
142 | }
143 | 
144 | /**
145 |  * \ingroup shuffle
146 |  * \brief Extract one value from SIMD register.
147 |  *
148 |  * The `index` template parameter is verified with `static_assert` and the compilation will
149 |  * fail with <b>`"Index out of bounds"`</b> error on invalid values.
150 |  *
151 |  * \param vec SIMD register
152 |  * \tparam index Index of the value inside the SIMD register
153 |  * \tparam ValueType_T Base type of original SIMD register
154 |  * \returns The value extracted
155 |  *
156 |  * **Example**
157 |  * ```{.cpp}
158 |  * #include <iostream>
159 |  * #include <litesimd/types.h>
160 |  * #include <litesimd/shuffle.h>
161 |  *
162 |  * int main()
163 |  * {
164 |  *     namespace ls = litesimd;
165 |  *
166 |  *     ls::t_int32_simd x( 9, 7, 5, 2 );
167 |  *     std::cout << "get<2>( x ): "
168 |  *               << ls::get<2>( x ) << std::endl;
169 |  *     return 0;
170 |  * }
171 |  * ```
172 |  * Output on a SSE compilation
173 |  * ```
174 |  * get<2>( x ): 7
175 |  * ```
176 |  */
177 | template< int index, typename ValueType_T, typename Tag_T = default_tag >
178 | inline ValueType_T
179 | get( simd_type< ValueType_T, Tag_T > vec )
180 | {
181 |     static_assert( 0 <= index && index < simd_type< ValueType_T, Tag_T >::simd_size,
182 |                     "Index out of bounds" );
183 | 
184 |     return get_functor< index, ValueType_T, Tag_T >()( vec );
185 | }
186 | 
187 | /**
188 |  * \ingroup shuffle
189 |  * \brief Extract one value from SIMD register.
190 |  *
191 |  * The `index` template parameter is verified with `static_assert` and the compilation will
192 |  * fail with <b>`"Index out of bounds"`</b> error on invalid values.
193 |  *
194 |  * \param vec SIMD register
195 |  * \param val The value to be set
196 |  * \tparam index Index of the value inside the SIMD register
197 |  * \tparam ValueType_T Base type of original SIMD register
198 |  * \returns The value extracted
199 |  *
200 |  * **Example**
201 |  * ```{.cpp}
202 |  * #include <iostream>
203 |  * #include <litesimd/types.h>
204 |  * #include <litesimd/shuffle.h>
205 |  * #include <litesimd/helpers/iostream.h>
206 |  *
207 |  * int main()
208 |  * {
209 |  *     namespace ls = litesimd;
210 |  *
211 |  *     ls::t_int32_simd x( 9, 7, 5, 2 );
212 |  *     std::cout << "set<2>( x, 11 ): "
213 |  *               << ls::set<2>( x, 11 ) << std::endl;
214 |  *     return 0;
215 |  * }
216 |  * ```
217 |  * Output on a SSE compilation
218 |  * ```
219 |  * set<2>( x, 11 ): (9, 11, 5, 2)
220 |  * ```
221 |  */
222 | template< int index, typename ValueType_T, typename Tag_T = default_tag >
223 | inline simd_type< ValueType_T, Tag_T >
224 | set( simd_type< ValueType_T, Tag_T > vec, ValueType_T val )
225 | {
226 |     static_assert( 0 <= index && index < simd_type< ValueType_T, Tag_T >::simd_size,
227 |                     "Index out of bounds" );
228 | 
229 |     return set_functor< index, ValueType_T, Tag_T >()( vec, val );
230 | }
231 | 
232 | } // namespace litesimd
233 | 
234 | #endif // LITESIMD_SHUFFLE_H
235 | 


--------------------------------------------------------------------------------
/include/litesimd/detail/arch/avx/compare.h:
--------------------------------------------------------------------------------
  1 | // MIT License
  2 | //
  3 | // Copyright (c) 2018 André Tupinambá
  4 | //
  5 | // Permission is hereby granted, free of charge, to any person obtaining a copy
  6 | // of this software and associated documentation files (the "Software"), to deal
  7 | // in the Software without restriction, including without limitation the rights
  8 | // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
  9 | // copies of the Software, and to permit persons to whom the Software is
 10 | // furnished to do so, subject to the following conditions:
 11 | //
 12 | // The above copyright notice and this permission notice shall be included in all
 13 | // copies or substantial portions of the Software.
 14 | //
 15 | // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 16 | // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 17 | // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
 18 | // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
 19 | // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
 20 | // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
 21 | // SOFTWARE.
 22 | 
 23 | #ifndef LITESIMD_ARCH_AVX_COMPARE_H
 24 | #define LITESIMD_ARCH_AVX_COMPARE_H
 25 | 
 26 | #ifdef LITESIMD_HAS_AVX
 27 | 
 28 | #include <litesimd/types.h>
 29 | #include <litesimd/detail/arch/common/compare.h>
 30 | #include <litesimd/detail/arch/sse/compare.h>
 31 | #include <litesimd/detail/arch/avx/bitwise.h>
 32 | 
 33 | namespace litesimd {
 34 | 
 35 | // Bit scan
 36 | // ---------------------------------------------------------------------------------------
 37 | template<> inline std::pair<int, bool>
 38 | bit_scan_forward< avx_tag >( uint32_t bitmask )
 39 | {
 40 |     return bit_scan_forward< sse_tag >( bitmask );
 41 | }
 42 | 
 43 | template<> inline std::pair<int, bool>
 44 | bit_scan_reverse< avx_tag >( uint32_t bitmask )
 45 | {
 46 |     return bit_scan_reverse< sse_tag >( bitmask );
 47 | }
 48 | 
 49 | // Mask to bitmask
 50 | // ---------------------------------------------------------------------------------------
 51 | #define DEF_MASK_TO_BITMASK( TYPE_T, CMD ) \
 52 | template<> inline typename simd_type< TYPE_T, avx_tag >::bitmask_type \
 53 | mask_to_bitmask< TYPE_T, avx_tag >( simd_type< TYPE_T, avx_tag > mask ) { \
 54 |     return CMD( mask ); \
 55 | }
 56 | 
 57 | DEF_MASK_TO_BITMASK( int8_t,  _mm256_movemask_epi8 )
 58 | DEF_MASK_TO_BITMASK( int16_t, _mm256_movemask_epi8 )
 59 | DEF_MASK_TO_BITMASK( int32_t, _mm256_movemask_epi8 )
 60 | DEF_MASK_TO_BITMASK( int64_t, _mm256_movemask_epi8 )
 61 | DEF_MASK_TO_BITMASK( float,   _mm256_movemask_ps )
 62 | DEF_MASK_TO_BITMASK( double,  _mm256_movemask_pd )
 63 | 
 64 | #undef DEF_MASK_TO_BITMASK
 65 | 
 66 | // Greater than
 67 | // ---------------------------------------------------------------------------------------
 68 | #define DEF_GREATER_THAN( TYPE_T, CMD ) \
 69 | template<> inline simd_type< TYPE_T, avx_tag > \
 70 | greater< TYPE_T, avx_tag >( simd_type< TYPE_T, avx_tag > lhs, \
 71 |                                  simd_type< TYPE_T, avx_tag > rhs ) { \
 72 |     return CMD( lhs, rhs ); \
 73 | }
 74 | 
 75 | DEF_GREATER_THAN( int8_t,  _mm256_cmpgt_epi8 )
 76 | DEF_GREATER_THAN( int16_t, _mm256_cmpgt_epi16 )
 77 | DEF_GREATER_THAN( int32_t, _mm256_cmpgt_epi32 )
 78 | DEF_GREATER_THAN( int64_t, _mm256_cmpgt_epi64 )
 79 | 
 80 | #undef DEF_GREATER_THAN
 81 | 
 82 | template<> inline simd_type< float, avx_tag >
 83 | greater< float, avx_tag >( simd_type< float, avx_tag > lhs,
 84 |                                 simd_type< float, avx_tag > rhs )
 85 | {
 86 |     // Quietly ignore NaN
 87 |     return _mm256_cmp_ps( lhs, rhs, _CMP_GT_OQ );
 88 | }
 89 | 
 90 | template<> inline simd_type< double, avx_tag >
 91 | greater< double, avx_tag >( simd_type< double, avx_tag > lhs,
 92 |                                  simd_type< double, avx_tag > rhs )
 93 | {
 94 |     // Quietly ignore NaN
 95 |     return _mm256_cmp_pd( lhs, rhs, _CMP_GT_OQ );
 96 | }
 97 | 
 98 | // Equals
 99 | // ---------------------------------------------------------------------------------------
100 | #define DEF_EQUALS( TYPE_T, CMD ) \
101 | template<> inline simd_type< TYPE_T, avx_tag > \
102 | equal_to< TYPE_T, avx_tag >( simd_type< TYPE_T, avx_tag > lhs, \
103 |                            simd_type< TYPE_T, avx_tag > rhs ) { \
104 |     return CMD( lhs, rhs ); \
105 | }
106 | 
107 | DEF_EQUALS( int8_t,  _mm256_cmpeq_epi8 )
108 | DEF_EQUALS( int16_t, _mm256_cmpeq_epi16 )
109 | DEF_EQUALS( int32_t, _mm256_cmpeq_epi32 )
110 | DEF_EQUALS( int64_t, _mm256_cmpeq_epi64 )
111 | 
112 | #undef DEF_EQUALS
113 | 
114 | template<> inline simd_type< float, avx_tag >
115 | equal_to< float, avx_tag >( simd_type< float, avx_tag > lhs,
116 |                           simd_type< float, avx_tag > rhs )
117 | {
118 |     // Quietly ignore NaN
119 |     return _mm256_cmp_ps( lhs, rhs, _CMP_EQ_OQ );
120 | }
121 | 
122 | template<> inline simd_type< double, avx_tag >
123 | equal_to< double, avx_tag >( simd_type< double, avx_tag > lhs,
124 |                            simd_type< double, avx_tag > rhs )
125 | {
126 |     // Quietly ignore NaN
127 |     return _mm256_cmp_pd( lhs, rhs, _CMP_EQ_OQ );
128 | }
129 | 
130 | // none_of
131 | // ---------------------------------------------------------------------------------------
132 | template< typename ValueType_T >
133 | struct none_of_op< ValueType_T,
134 |               typename std::enable_if<std::is_integral<ValueType_T>::value, avx_tag>::type >
135 | {
136 |     inline bool operator()( simd_type< ValueType_T, avx_tag > mask )
137 |     {
138 |         return !!_mm256_testz_si256( mask, simd_type< ValueType_T, avx_tag >::ones() );
139 |     }
140 | };
141 | 
142 | template<> struct none_of_op< float, avx_tag >
143 | {
144 |     inline bool operator()( simd_type< float, avx_tag > mask )
145 |     {
146 |         __m256i imask = reinterpret_cast<__m256i>( static_cast<__m256>( mask ) );
147 |         return none_of_op< int32_t, avx_tag >()( imask );
148 |     }
149 | };
150 | 
151 | template<> struct none_of_op< double, avx_tag >
152 | {
153 |     inline bool operator()( simd_type< double, avx_tag > mask )
154 |     {
155 |         __m256i imask = reinterpret_cast<__m256i>( static_cast<__m256d>( mask ) );
156 |         return none_of_op< int64_t, avx_tag >()( imask );
157 |     }
158 | };
159 | 
160 | // all_of
161 | // ---------------------------------------------------------------------------------------
162 | template< typename ValueType_T >
163 | struct all_of_op< ValueType_T,
164 |               typename std::enable_if<std::is_integral<ValueType_T>::value, avx_tag>::type >
165 | {
166 |     inline bool operator()( simd_type< ValueType_T, avx_tag > mask )
167 |     {
168 |         return none_of_op< ValueType_T, avx_tag >()( bit_not( mask ) );
169 |     }
170 | };
171 | 
172 | template<> struct all_of_op< float, avx_tag >
173 | {
174 |     inline bool operator()( simd_type< float, avx_tag > mask )
175 |     {
176 |         __m256i imask = reinterpret_cast<__m256i>( static_cast<__m256>( mask ) );
177 |         return all_of_op< int32_t, avx_tag >()( imask );
178 |     }
179 | };
180 | 
181 | template<> struct all_of_op< double, avx_tag >
182 | {
183 |     inline bool operator()( simd_type< double, avx_tag > mask )
184 |     {
185 |         __m256i imask = reinterpret_cast<__m256i>( static_cast<__m256d>( mask ) );
186 |         return all_of_op< int64_t, avx_tag >()( imask );
187 |     }
188 | };
189 | 
190 | template< typename ValueType_T >
191 | struct all_of_bitmask_op< ValueType_T,
192 |               typename std::enable_if<std::is_integral<ValueType_T>::value, avx_tag>::type >
193 | {
194 |     inline bool operator()( typename simd_type< ValueType_T, avx_tag >::bitmask_type bitmask )
195 |     {
196 |         return (bitmask == 0xffffffff);
197 |     }
198 | };
199 | 
200 | template< typename ValueType_T >
201 | struct all_of_bitmask_op< ValueType_T,
202 |               typename std::enable_if<std::is_floating_point<ValueType_T>::value, avx_tag>::type >
203 | {
204 |     inline bool operator()( typename simd_type< ValueType_T, avx_tag >::bitmask_type bitmask )
205 |     {
206 |         constexpr static typename simd_type< ValueType_T, avx_tag >::bitmask_type true_mask =
207 |             (1 << simd_type< ValueType_T, avx_tag >::simd_size) -1;
208 |         return (bitmask == true_mask);
209 |     }
210 | };
211 | 
212 | } // namespace litesimd
213 | 
214 | #endif // LITESIMD_HAS_AVX
215 | #endif // LITESIMD_ARCH_AVX_COMPARE_H
216 | 


--------------------------------------------------------------------------------
/samples/nway_tree/nway_tree.cpp:
--------------------------------------------------------------------------------
  1 | // MIT License
  2 | //
  3 | // Copyright (c) 2018 André Tupinambá
  4 | //
  5 | // Permission is hereby granted, free of charge, to any person obtaining a copy
  6 | // of this software and associated documentation files (the "Software"), to deal
  7 | // in the Software without restriction, including without limitation the rights
  8 | // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
  9 | // copies of the Software, and to permit persons to whom the Software is
 10 | // furnished to do so, subject to the following conditions:
 11 | //
 12 | // The above copyright notice and this permission notice shall be included in all
 13 | // copies or substantial portions of the Software.
 14 | //
 15 | // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 16 | // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 17 | // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
 18 | // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
 19 | // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
 20 | // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
 21 | // SOFTWARE.
 22 | 
 23 | #include <iostream>
 24 | #include <iomanip>
 25 | #include <algorithm>
 26 | #include <numeric>
 27 | #include <map>
 28 | #include <boost/timer/timer.hpp>
 29 | 
 30 | #include <litesimd/types.h>
 31 | #include <litesimd/compare.h>
 32 | #include <litesimd/helpers/containers.h>
 33 | 
 34 | bool g_verbose = true;
 35 | namespace ls = litesimd;
 36 | 
 37 | template< class Cont_T, typename TAG_T >
 38 | struct container_only
 39 | {
 40 | 	using container_type = Cont_T;
 41 |     using value_type     = typename container_type::value_type;
 42 |     using const_iterator = typename container_type::const_iterator;
 43 | 
 44 |     container_only( const container_type& ref ) : ref_( ref ){}
 45 | 
 46 |     void build_index(){}
 47 | 
 48 |     const_iterator find( const value_type& key )
 49 |     {
 50 |         auto first = std::lower_bound( ref_.begin(), ref_.end(), key );
 51 |         return (first!=ref_.end() && !(key<*first)) ? first : ref_.end();
 52 |     }
 53 | private:
 54 |     const container_type& ref_;
 55 | };
 56 | 
 57 | template< class Cont_T, typename TAG_T >
 58 | struct map_index
 59 | {
 60 | 	using container_type = Cont_T;
 61 |     using value_type     = typename container_type::value_type;
 62 |     using const_iterator = typename container_type::const_iterator;
 63 | 
 64 |     map_index( const container_type& ref ) : ref_( ref ){}
 65 | 
 66 |     void build_index()
 67 |     {
 68 |         for( auto it = ref_.begin(); it != ref_.end(); ++it )
 69 |         {
 70 |             index_[ *it ] = it;
 71 |         }
 72 |     }
 73 | 
 74 |     const_iterator find( const value_type& key )
 75 |     {
 76 |         auto it = index_.find( key );
 77 |         return (it != index_.end()) ? it->second : ref_.end() ;
 78 |     }
 79 | 
 80 | private:
 81 |     const container_type& ref_;
 82 |     std::map< value_type, const_iterator > index_;
 83 | };
 84 | 
 85 | template< class Cont_T, typename TAG_T >
 86 | class nway_tree
 87 | {
 88 | public:
 89 | 	using container_type = Cont_T;
 90 |     using value_type     = typename container_type::value_type;
 91 |     using const_iterator = typename container_type::const_iterator;
 92 | 
 93 |     nway_tree( const container_type& ref )
 94 |         : ref_( ref ){}
 95 | 
 96 |     void build_index()
 97 |     {
 98 |         build_index( ref_ );
 99 |     }
100 | 
101 |     const_iterator find( const value_type& key ) const
102 |     {
103 |         size_t idx = 0;
104 |         for( auto&& level : tree_ )
105 |         {
106 |             int li = ls::greater_last_index< value_type, TAG_T >( key, *level.get_simd( idx ) );
107 |             idx = idx * array_size + li + 1;
108 |         }
109 | 
110 |         const simd_type* cmp = reinterpret_cast< const simd_type* >( &ref_[ idx * array_size ] );
111 |         int off = ls::equal_to_last_index< value_type, TAG_T >( key, *cmp );
112 | 
113 |         if( off < 0 )
114 |         {
115 |             return ref_.end();
116 |         }
117 |         auto it = ref_.begin();
118 |         std::advance( it, idx * array_size + off );
119 |         return it;
120 |     }
121 | 
122 | private:
123 |     constexpr static size_t array_size = ls::simd_type< value_type, TAG_T >::simd_size;
124 |     using simd_type = ls::simd_type< value_type, TAG_T >;
125 | 
126 |     struct tree_level
127 |     {
128 |         ls::vector< value_type > keys_;
129 | 
130 |         const simd_type* get_simd( size_t idx ) const
131 |         {
132 |             return reinterpret_cast< const simd_type* >( &keys_[ idx * array_size ] );
133 |         }
134 | 
135 |         void adjust()
136 |         {
137 |             size_t size = keys_.size() / array_size;
138 |             if( keys_.size() > size * array_size )
139 |                 ++size;
140 |             size *= array_size;
141 |             keys_.resize( size, std::numeric_limits< value_type >::max() );
142 |         }
143 |     };
144 | 
145 |     ls::vector< tree_level > tree_;
146 |     const container_type& ref_;
147 | 
148 |     void build_index( const container_type& cont )
149 |     {
150 |         if( cont.size() <= array_size )
151 |             return;
152 | 
153 |         tree_level level;
154 |         for( size_t i = array_size-1; i < cont.size(); i += array_size )
155 |         {
156 |             level.keys_.push_back( cont[ i ] );
157 |         }
158 | 
159 |         build_index( level.keys_ );
160 | 
161 |         level.adjust();
162 |         tree_.emplace_back( std::move( level ) );
163 |         return;
164 |     }
165 | };
166 | 
167 | void do_nothing( int32_t );
168 | 
169 | template< class Cont_T, template < typename... > class Index_T, typename TAG_T >
170 | uint64_t bench( const std::string& name, size_t size, size_t loop )
171 | {
172 | 	using container_type = Cont_T;
173 |     using index_type = Index_T< container_type, TAG_T >;
174 | 
175 |     boost::timer::cpu_timer timer;
176 |     container_type org;
177 | 
178 |     srand( 1 );
179 |     std::generate_n( std::back_inserter(org), size, &rand );
180 |     container_type sorted( org );
181 |     std::sort( sorted.begin(), sorted.end() );
182 |     index_type index( sorted );
183 | 
184 |     index.build_index();
185 | 
186 |     timer.start();
187 |     for( size_t j = 0; j < loop; ++j )
188 |     {
189 |         for( auto i : org )
190 |         {
191 |             auto ret = index.find( i );
192 |             do_nothing( *ret );
193 |         }
194 |     }
195 |     timer.stop();
196 |     if( g_verbose )
197 |         std::cout << "Find all " << name << ": " << timer.format();
198 | 
199 |     return timer.elapsed().wall;
200 | }
201 | 
202 | int main(int argc, char* /*argv*/[])
203 | {
204 |     constexpr size_t runSize = 0x00400000;
205 |     constexpr size_t loop = 10;
206 |     if( argc > 1 )
207 |     {
208 |         g_verbose = false;
209 |         std::cout << "base,index_sse,index_avx" << std::endl;
210 |     }
211 |     else
212 |     {
213 |         std::cout << "\nsize: 0x" << std::hex << std::setw(8) << std::setfill( '0') << runSize << std::endl << std::endl;
214 |     }
215 |     while( 1 )
216 |     {
217 |         uint64_t base = bench< ls::vector< int32_t >, container_only, void >( "lower_bound .", runSize, loop );
218 |         uint64_t index1 = bench< ls::vector< int32_t >, nway_tree, ls::sse_tag >( "index SSE ...", runSize, loop );
219 | #ifdef LITESIMD_HAS_AVX
220 |         uint64_t index2 = bench< ls::vector< int32_t >, nway_tree, ls::avx_tag >( "index AVX ...", runSize, loop );
221 | #endif // LITESIMD_HAS_AVX
222 | 
223 |         if( g_verbose )
224 |         {
225 |             bench< ls::vector< int32_t >, map_index, void >( "std::map ....", runSize, loop );
226 | 
227 |             std::cout
228 |                       << std::endl << "Index Speed up SSE.......: " << std::fixed << std::setprecision(2)
229 |                       << static_cast<float>(base)/static_cast<float>(index1) << "x"
230 | 
231 | #ifdef LITESIMD_HAS_AVX
232 |                       << std::endl << "Index Speed up AVX.......: " << std::fixed << std::setprecision(2)
233 |                       << static_cast<float>(base)/static_cast<float>(index2) << "x"
234 | #endif // LITESIMD_HAS_AVX
235 | 
236 |                       << std::endl << std::endl;
237 |         }
238 |         else
239 |         {
240 |             std::cout
241 |                 << base << ","
242 |                 << index1 << ","
243 | #ifdef LITESIMD_HAS_AVX
244 |                 << index2
245 | #endif // LITESIMD_HAS_AVX
246 |                 << std::endl;
247 |         }
248 |     }
249 |     return 0;
250 | }
251 | 


--------------------------------------------------------------------------------
/include/litesimd/detail/arch/common/arithmetic.h:
--------------------------------------------------------------------------------
  1 | // MIT License
  2 | //
  3 | // Copyright (c) 2018 André Tupinambá
  4 | //
  5 | // Permission is hereby granted, free of charge, to any person obtaining a copy
  6 | // of this software and associated documentation files (the "Software"), to deal
  7 | // in the Software without restriction, including without limitation the rights
  8 | // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
  9 | // copies of the Software, and to permit persons to whom the Software is
 10 | // furnished to do so, subject to the following conditions:
 11 | //
 12 | // The above copyright notice and this permission notice shall be included in all
 13 | // copies or substantial portions of the Software.
 14 | //
 15 | // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 16 | // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 17 | // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
 18 | // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
 19 | // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
 20 | // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
 21 | // SOFTWARE.
 22 | 
 23 | #ifndef LITESIMD_ARCH_COMMON_ARITHMETIC_H
 24 | #define LITESIMD_ARCH_COMMON_ARITHMETIC_H
 25 | 
 26 | #include <litesimd/types.h>
 27 | 
 28 | namespace litesimd {
 29 | 
 30 | // Basic operations
 31 | // ---------------------------------------------------------------------------------------
 32 | /**
 33 |  * \ingroup arithmetic
 34 |  * \brief Adds each value inside the packed SIMD register.
 35 |  *
 36 |  * Each value inside the packed SIMD register is added indenpendently and the result is
 37 |  * saved on the same index inside the packed SIMD register.
 38 |  *
 39 |  * | Index | 3 | 2 | 1 | 0 |
 40 |  * | :--- | :--: | :--: | :--: | :--: |
 41 |  * | Register X | a | b | c | d |
 42 |  * | Register Y | e | f | g | h |
 43 |  * | litesimd::add( X, Y ) | a + e | b + f | c + g | d + h |
 44 |  *
 45 |  * \param lhs, rhs SIMD registers to be added
 46 |  * \tparam ValueType_T Type of value inside packed SIMD register.
 47 |  *         Only for signed numerical types (int8_t, int16_t, int32_t, int64_t, float and double)
 48 |  * \tparam Tag_T Metaprogramming tag for instruction set selection.
 49 |  * \returns SIMD register with the sum of each packed value
 50 |  *
 51 |  * **Example**
 52 |  * ```{.cpp}
 53 |  * #include <iostream>
 54 |  * #include <litesimd/types.h>
 55 |  * #include <litesimd/algorithm.h>
 56 |  * #include <litesimd/helpers/iostream.h>
 57 |  *
 58 |  * int main()
 59 |  * {
 60 |  *     namespace ls = litesimd;
 61 |  *     t_int32_simd a( 10 ), b( 1 );
 62 |  *     std::cout << "add( a, b ): " << ls::add( a, b ) << std::endl;
 63 |  *     return 0;
 64 |  * }
 65 |  * ```
 66 |  * Output on a SSE compilation
 67 |  * ```
 68 |  * add( a, b ): ( 11, 11, 11, 11 )
 69 |  * ```
 70 |  *
 71 |  * \see sub
 72 |  */
 73 | template< typename ValueType_T, typename Tag_T = default_tag >
 74 | simd_type< ValueType_T, Tag_T >
 75 | add( simd_type< ValueType_T, Tag_T > lhs, simd_type< ValueType_T, Tag_T > rhs ){}
 76 | 
 77 | /**
 78 |  * \ingroup arithmetic
 79 |  * \brief Subtract each value inside the packed SIMD register.
 80 |  *
 81 |  * Each value inside the packed SIMD register is subtracted indenpendently and the result is
 82 |  * saved on the same index inside the packed SIMD register.
 83 |  *
 84 |  * | Index | 3 | 2 | 1 | 0 |
 85 |  * | :--- | :--: | :--: | :--: | :--: |
 86 |  * | Register X | a | b | c | d |
 87 |  * | Register Y | e | f | g | h |
 88 |  * | litesimd::sub( X, Y ) | a - e | b - f | c - g | d - h |
 89 |  *
 90 |  * \param lhs, rhs SIMD registers to be subtracted
 91 |  * \tparam ValueType_T Type of value inside packed SIMD register.
 92 |  *         Only for signed numerical types (int8_t, int16_t, int32_t, int64_t, float and double)
 93 |  * \tparam Tag_T Metaprogramming tag for instruction set selection.
 94 |  * \returns SIMD register with the difference of each packed value
 95 |  *
 96 |  * **Example**
 97 |  * ```{.cpp}
 98 |  * #include <iostream>
 99 |  * #include <litesimd/types.h>
100 |  * #include <litesimd/algorithm.h>
101 |  * #include <litesimd/helpers/iostream.h>
102 |  *
103 |  * int main()
104 |  * {
105 |  *     namespace ls = litesimd;
106 |  *     t_int32_simd a( 10 ), b( 1 );
107 |  *     std::cout << "sub( a, b ): " << ls::sub( a, b ) << std::endl;
108 |  *     return 0;
109 |  * }
110 |  * ```
111 |  * Output on a SSE compilation
112 |  * ```
113 |  * sub( a, b ): ( 9, 9, 9, 9 )
114 |  * ```
115 |  *
116 |  * \see add
117 |  */
118 | template< typename ValueType_T, typename Tag_T = default_tag >
119 | simd_type< ValueType_T, Tag_T >
120 | sub( simd_type< ValueType_T, Tag_T > lhs, simd_type< ValueType_T, Tag_T > rhs ){}
121 | 
122 | /**
123 |  * \ingroup arithmetic
124 |  * \brief Multiply each value inside the packed SIMD register and save the least significant bits.
125 |  *
126 |  * Each value inside the packed SIMD register are multiplied independently and the
127 |  * least significant bits of the result are saved on the same index inside the packed SIMD register.
128 |  *
129 |  * For 16 bits
130 |  * | Index | 3 | 2 | 1 | 0 |
131 |  * | :--- | :--: | :--: | :--: | :--: |
132 |  * | Register X | a | b | c | d |
133 |  * | Register Y | e | f | g | h |
134 |  * | litesimd::mullo( X, Y ) | (a * e) & 0xffff | (b * f) & 0xffff | (c * g) & 0xffff | (d * h) & 0xffff |
135 |  *
136 |  * \param lhs, rhs SIMD registers to be multiplied
137 |  * \tparam ValueType_T Type of value inside packed SIMD register. Only int16_t and int32_t are available for this function.
138 |  * \tparam Tag_T Metaprogramming tag for instruction set selection.
139 |  * \returns SIMD register with the least significant bit of multiplication result
140 |  *
141 |  * **Example**
142 |  * ```{.cpp}
143 |  * #include <iostream>
144 |  * #include <litesimd/types.h>
145 |  * #include <litesimd/algorithm.h>
146 |  * #include <litesimd/helpers/iostream.h>
147 |  *
148 |  * int main()
149 |  * {
150 |  *     namespace ls = litesimd;
151 |  *     t_int16_simd a( 0x1002 ), b( 0x10 );
152 |  *     std::cout << "mullo( a, b ): " << ls::mullo( a, b ) << std::endl;
153 |  *     return 0;
154 |  * }
155 |  * ```
156 |  * Output on a SSE compilation
157 |  * ```
158 |  * mullo( a, b ): (32, 32, 32, 32, 32, 32, 32, 32)
159 |  * ```
160 |  *
161 |  * \remarks This functions works only on int16_t and int32_t on Intel archtecture
162 |  * \see mulhi
163 |  */
164 | template< typename ValueType_T, typename Tag_T = default_tag >
165 | simd_type< ValueType_T, Tag_T >
166 | mullo( simd_type< ValueType_T, Tag_T > lhs, simd_type< ValueType_T, Tag_T > rhs ){}
167 | 
168 | /**
169 |  * \ingroup arithmetic
170 |  * \brief Multiply each value inside the packed SIMD register and save the most significant bits.
171 |  *
172 |  * Each value inside the packed SIMD register are multiplied independently and the
173 |  * most significant bits of the result are saved on the same index inside the packed SIMD register.
174 |  *
175 |  * For 16 bits
176 |  * | Index | 3 | 2 | 1 | 0 |
177 |  * | :--- | :--: | :--: | :--: | :--: |
178 |  * | Register X | a | b | c | d |
179 |  * | Register Y | e | f | g | h |
180 |  * | litesimd::mulhi( X, Y ) | (a * e) >> 16 | (b * f) >> 16 | (c * g) >> 16 | (d * h) >> 16 |
181 |  *
182 |  * \param lhs, rhs SIMD registers to be multiplied
183 |  * \tparam ValueType_T Type of value inside packed SIMD register. Only int16_t is available for this function.
184 |  * \tparam Tag_T Metaprogramming tag for instruction set selection.
185 |  * \returns SIMD register with the most significant bit of multiplication result
186 |  *
187 |  * **Example**
188 |  * ```{.cpp}
189 |  * #include <iostream>
190 |  * #include <litesimd/types.h>
191 |  * #include <litesimd/algorithm.h>
192 |  * #include <litesimd/helpers/iostream.h>
193 |  *
194 |  * int main()
195 |  * {
196 |  *     namespace ls = litesimd;
197 |  *     t_int16_simd a( 0x1002 ), b( 0x10 );
198 |  *     std::cout << "mulhi( a, b ): " << ls::mulhi( a, b ) << std::endl;
199 |  *     return 0;
200 |  * }
201 |  * ```
202 |  * Output on a SSE compilation
203 |  * ```
204 |  * mulhi( a, b ): (16, 16, 16, 16, 16, 16, 16, 16)
205 |  * ```
206 |  *
207 |  * \remarks This functions works only on int16_t on Intel archtecture
208 |  * \see mullo
209 |  */
210 | template< typename ValueType_T, typename Tag_T = default_tag >
211 | simd_type< ValueType_T, Tag_T >
212 | mulhi( simd_type< ValueType_T, Tag_T > lhs, simd_type< ValueType_T, Tag_T > rhs ){}
213 | 
214 | template< typename ValueType_T, typename Tag_T = default_tag >
215 | simd_type< ValueType_T, Tag_T >
216 | div( simd_type< ValueType_T, Tag_T > lhs, simd_type< ValueType_T, Tag_T > rhs ){}
217 | 
218 | } // namespace litesimd
219 | 
220 | #endif // LITESIMD_ARCH_COMMON_ARITHMETIC_H
221 | 


--------------------------------------------------------------------------------
/include/litesimd/detail/arch/sse/shuffle.h:
--------------------------------------------------------------------------------
  1 | // MIT License
  2 | //
  3 | // Copyright (c) 2018 André Tupinambá
  4 | //
  5 | // Permission is hereby granted, free of charge, to any person obtaining a copy
  6 | // of this software and associated documentation files (the "Software"), to deal
  7 | // in the Software without restriction, including without limitation the rights
  8 | // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
  9 | // copies of the Software, and to permit persons to whom the Software is
 10 | // furnished to do so, subject to the following conditions:
 11 | //
 12 | // The above copyright notice and this permission notice shall be included in all
 13 | // copies or substantial portions of the Software.
 14 | //
 15 | // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 16 | // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 17 | // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
 18 | // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
 19 | // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
 20 | // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
 21 | // SOFTWARE.
 22 | 
 23 | #ifndef LITESIMD_SSE_SHUFFLE_H
 24 | #define LITESIMD_SSE_SHUFFLE_H
 25 | 
 26 | #ifdef LITESIMD_HAS_SSE
 27 | 
 28 | #include <smmintrin.h>
 29 | #include <litesimd/types.h>
 30 | #include <litesimd/detail/arch/common/shuffle.h>
 31 | 
 32 | namespace litesimd {
 33 | 
 34 | // Blend
 35 | // ---------------------------------------------------------------------------------------
 36 | #define DEF_BLEND( TYPE_T, BLEND_CMD ) \
 37 | template<> inline simd_type< TYPE_T, sse_tag > \
 38 | blend< TYPE_T, sse_tag >( simd_type< TYPE_T, sse_tag > mask, \
 39 |                           simd_type< TYPE_T, sse_tag > trueVal, \
 40 |                           simd_type< TYPE_T, sse_tag > falseVal ) { \
 41 |     return BLEND_CMD( falseVal, trueVal, mask ); }
 42 | 
 43 | DEF_BLEND( int8_t,  _mm_blendv_epi8 )
 44 | DEF_BLEND( int16_t, _mm_blendv_epi8 )
 45 | DEF_BLEND( int32_t, _mm_blendv_epi8 )
 46 | DEF_BLEND( int64_t, _mm_blendv_epi8 )
 47 | DEF_BLEND( float,   _mm_blendv_ps )
 48 | DEF_BLEND( double,  _mm_blendv_pd )
 49 | #undef DEF_BLEND
 50 | 
 51 | // Get
 52 | // ---------------------------------------------------------------------------------------
 53 | template< int index >
 54 | struct get_functor< index, int8_t, sse_tag >
 55 | {
 56 |     int8_t inline operator()( simd_type< int8_t, sse_tag > vec )
 57 |     {
 58 |         return _mm_extract_epi8( vec, index );
 59 |     }
 60 | };
 61 | 
 62 | template< int index >
 63 | struct get_functor< index, int16_t, sse_tag >
 64 | {
 65 |     int16_t inline operator()( simd_type< int16_t, sse_tag > vec )
 66 |     {
 67 |         return _mm_extract_epi16( vec, index );
 68 |     }
 69 | };
 70 | 
 71 | template< int index >
 72 | struct get_functor< index, int32_t, sse_tag >
 73 | {
 74 |     int32_t inline operator()( simd_type< int32_t, sse_tag > vec )
 75 |     {
 76 |         return _mm_extract_epi32( vec, index );
 77 |     }
 78 | };
 79 | 
 80 | template< int index >
 81 | struct get_functor< index, int64_t, sse_tag >
 82 | {
 83 |     int64_t inline operator()( simd_type< int64_t, sse_tag > vec )
 84 |     {
 85 |         return _mm_extract_epi64( vec, index );
 86 |     }
 87 | };
 88 | 
 89 | template< int index >
 90 | struct get_functor< index, float, sse_tag >
 91 | {
 92 |     float inline operator()( simd_type< float, sse_tag > vec )
 93 |     {
 94 |         return _mm_cvtss_f32( _mm_shuffle_ps( vec, vec, index ) );
 95 |     }
 96 | };
 97 | 
 98 | template< int index >
 99 | struct get_functor< index, double, sse_tag >
100 | {
101 |     double inline operator()( simd_type< double, sse_tag > vec )
102 |     {
103 |         return _mm_cvtsd_f64( _mm_shuffle_pd( vec, vec, index ) );
104 |     }
105 | };
106 | 
107 | // Set
108 | // ---------------------------------------------------------------------------------------
109 | template< int index >
110 | struct set_functor< index, int8_t, sse_tag >
111 | {
112 |     simd_type< int8_t, sse_tag > inline
113 |     operator()( simd_type< int8_t, sse_tag > vec, int8_t val )
114 |     {
115 |         return _mm_insert_epi8( vec, val, index );
116 |     }
117 | };
118 | 
119 | template< int index >
120 | struct set_functor< index, int16_t, sse_tag >
121 | {
122 |     simd_type< int16_t, sse_tag > inline
123 |     operator()( simd_type< int16_t, sse_tag > vec, int16_t val )
124 |     {
125 |         return _mm_insert_epi16( vec, val, index );
126 |     }
127 | };
128 | 
129 | template< int index >
130 | struct set_functor< index, int32_t, sse_tag >
131 | {
132 |     simd_type< int32_t, sse_tag > inline
133 |     operator()( simd_type< int32_t, sse_tag > vec, int32_t val )
134 |     {
135 |         return _mm_insert_epi32( vec, val, index );
136 |     }
137 | };
138 | 
139 | template< int index >
140 | struct set_functor< index, int64_t, sse_tag >
141 | {
142 |     simd_type< int64_t, sse_tag > inline
143 |     operator()( simd_type< int64_t, sse_tag > vec, int64_t val )
144 |     {
145 |         return _mm_insert_epi64( vec, val, index );
146 |     }
147 | };
148 | 
149 | template< int index >
150 | struct set_functor< index, float, sse_tag >
151 | {
152 |     simd_type< float, sse_tag > inline
153 |     operator()( simd_type< float, sse_tag > vec, float val )
154 |     {
155 |         return _mm_insert_ps( vec, _mm_set1_ps( val ), _MM_SHUFFLE( 0, index, 0, 0 ) );
156 |     }
157 | };
158 | 
159 | template<>
160 | struct set_functor< 0, double, sse_tag >
161 | {
162 |     simd_type< double, sse_tag > inline
163 |     operator()( simd_type< double, sse_tag > vec, double val )
164 |     {
165 |         return _mm_shuffle_pd( _mm_set1_pd( val ), vec, 2 );
166 |     }
167 | };
168 | 
169 | template<>
170 | struct set_functor< 1, double, sse_tag >
171 | {
172 |     simd_type< double, sse_tag > inline
173 |     operator()( simd_type< double, sse_tag > vec, double val )
174 |     {
175 |         return _mm_shuffle_pd( vec, _mm_set1_pd( val ), 0 );
176 |     }
177 | };
178 | 
179 | // High insert
180 | // ---------------------------------------------------------------------------------------
181 | template<> inline simd_type< int8_t, sse_tag >
182 | high_insert< int8_t, sse_tag >( simd_type< int8_t, sse_tag > vec, int8_t val )
183 | {
184 |     return set_functor<15, int8_t, sse_tag>()( _mm_srli_si128( vec, 1 ), val );
185 | }
186 | 
187 | template<> inline simd_type< int16_t, sse_tag >
188 | high_insert< int16_t, sse_tag >( simd_type< int16_t, sse_tag > vec, int16_t val )
189 | {
190 |     return set_functor<7, int16_t, sse_tag>()( _mm_srli_si128( vec, 2 ), val );
191 | }
192 | 
193 | template<> inline simd_type< int32_t, sse_tag >
194 | high_insert< int32_t, sse_tag >( simd_type< int32_t, sse_tag > vec, int32_t val )
195 | {
196 |     return set_functor<3, int32_t, sse_tag>()( _mm_shuffle_epi32( vec, _MM_SHUFFLE( 3, 3, 2, 1 ) ), val );
197 | }
198 | 
199 | template<> inline simd_type< int64_t, sse_tag >
200 | high_insert< int64_t, sse_tag >( simd_type< int64_t, sse_tag > vec, int64_t val )
201 | {
202 |     return set_functor<1, int64_t, sse_tag>()( _mm_shuffle_epi32( vec, _MM_SHUFFLE( 3, 3, 3, 2 ) ), val );
203 | }
204 | 
205 | template<> inline simd_type< float, sse_tag >
206 | high_insert< float, sse_tag >( simd_type< float, sse_tag > vec,
207 |                                 float val )
208 | {
209 |     return set_functor<3, float, sse_tag>()( _mm_shuffle_ps( vec, vec, _MM_SHUFFLE( 3, 3, 2, 1 ) ), val );
210 | }
211 | 
212 | template<> inline simd_type< double, sse_tag >
213 | high_insert< double, sse_tag >( simd_type< double, sse_tag > vec,
214 |                                 double val )
215 | {
216 |     return set_functor<1, double, sse_tag>()( _mm_shuffle_pd( vec, vec, 1 ), val );
217 | }
218 | 
219 | // Low insert
220 | // ---------------------------------------------------------------------------------------
221 | template<> inline simd_type< int8_t, sse_tag >
222 | low_insert< int8_t, sse_tag >( simd_type< int8_t, sse_tag > vec, int8_t val )
223 | {
224 |     return set_functor<0, int8_t, sse_tag>()( _mm_slli_si128( vec, 1 ), val );
225 | }
226 | 
227 | template<> inline simd_type< int16_t, sse_tag >
228 | low_insert< int16_t, sse_tag >( simd_type< int16_t, sse_tag > vec, int16_t val )
229 | {
230 |     return set_functor<0, int16_t, sse_tag>()( _mm_slli_si128( vec, 2 ), val );
231 | }
232 | 
233 | template<> inline simd_type< int32_t, sse_tag >
234 | low_insert< int32_t, sse_tag >( simd_type< int32_t, sse_tag > vec, int32_t val )
235 | {
236 |     return set_functor<0, int32_t, sse_tag>()( _mm_shuffle_epi32( vec, _MM_SHUFFLE( 2, 1, 0, 0 ) ), val );
237 | }
238 | 
239 | template<> inline simd_type< int64_t, sse_tag >
240 | low_insert< int64_t, sse_tag >( simd_type< int64_t, sse_tag > vec, int64_t val )
241 | {
242 |     return set_functor<0, int64_t, sse_tag>()( _mm_shuffle_epi32( vec, _MM_SHUFFLE( 1, 0, 0, 0 ) ), val );
243 | }
244 | 
245 | template<> inline simd_type< float, sse_tag >
246 | low_insert< float, sse_tag >( simd_type< float, sse_tag > vec,
247 |                                 float val )
248 | {
249 |     return set_functor<0, float, sse_tag>()( _mm_shuffle_ps( vec, vec, _MM_SHUFFLE( 2, 1, 0, 0 ) ), val );
250 | }
251 | 
252 | template<> inline simd_type< double, sse_tag >
253 | low_insert< double, sse_tag >( simd_type< double, sse_tag > vec,
254 |                                 double val )
255 | {
256 |     return set_functor<0, double, sse_tag>()( _mm_shuffle_pd( vec, vec, 0 ), val );
257 | }
258 | 
259 | } // namespace litesimd
260 | 
261 | #endif // LITESIMD_HAS_SSE
262 | #endif // LITESIMD_SSE_SHUFFLE_H
263 | 


--------------------------------------------------------------------------------