├── .github └── workflows │ └── ccpp.yml ├── .gitignore ├── .gitlab-ci.yml ├── CMakeLists.txt ├── FindMIPP.cmake ├── LICENSE ├── README.md ├── TODO.md ├── ci ├── analysis-cppcheck.sh ├── build-coverage-linux-armv7-gcc.sh ├── build-coverage-linux-armv8-gcc.sh ├── build-coverage-linux-x86-gcc.sh ├── build-linux-armv7-clang.sh ├── build-linux-armv7-gcc.sh ├── build-linux-armv8-clang.sh ├── build-linux-armv8-gcc.sh ├── build-linux-x86-clang.sh ├── build-linux-x86-gcc.sh ├── build-linux-x86-icpc.sh ├── build-macos-x86-clang.sh ├── build-windows-x86-gcc.bat ├── build-windows-x86-msvc.bat ├── coverage-linux.sh ├── test-linux-intel-sde-unit.sh ├── test-linux-macos-unit.sh ├── test-windows-unit.bat └── tools │ ├── build-linux-x86-gcc-generic.sh │ ├── threads.bat │ └── threads.sh ├── cmake ├── MIPPConfig.cmake.in └── cmake_uninstall.cmake.in ├── codegen ├── gen_compress.py ├── mipp_LUT.cpp.j2 ├── requirements.txt └── vcompress-LUT.cpp.j2 ├── examples ├── conversion.cpp ├── gemm.cpp ├── initreg.cpp ├── mask.cpp ├── mathfun.cpp ├── operator.cpp ├── reduction.cpp ├── sort │ ├── LC_sorter.hpp │ ├── LC_sorter_simd.hpp │ └── Quick_sorter.hpp └── sorting.cpp ├── include ├── math │ ├── avx512_mathfun.h │ ├── avx512_mathfun.hxx │ ├── avx_mathfun.h │ ├── avx_mathfun.hxx │ ├── neon_mathfun.h │ ├── neon_mathfun.hxx │ ├── sse_mathfun.h │ └── sse_mathfun.hxx ├── mipp.h ├── mipp_impl_AVX.hxx ├── mipp_impl_AVX512.hxx ├── mipp_impl_NEON.hxx ├── mipp_impl_SSE.hxx ├── mipp_impl_SVE.hxx ├── mipp_object.hxx ├── mipp_scalar_op.h └── mipp_scalar_op.hxx ├── mipp.jpg └── tests ├── lib └── Catch2 │ ├── README.md │ └── include │ └── catch.hpp └── src ├── arithmetic_operations ├── abs.cpp ├── add.cpp ├── cdiv.cpp ├── cmul.cpp ├── cmulconj.cpp ├── conj.cpp ├── cvt.cpp ├── div.cpp ├── div2.cpp ├── div4.cpp ├── fmadd.cpp ├── fmsub.cpp ├── fnmadd.cpp ├── fnmsub.cpp ├── max.cpp ├── min.cpp ├── mul.cpp ├── neg.cpp ├── norm.cpp ├── pack.cpp ├── round.cpp ├── rsqrt.cpp ├── sat.cpp ├── sign.cpp ├── sqrt.cpp ├── sub.cpp └── trunc.cpp ├── bitwise_operations ├── andb.cpp ├── andnb.cpp ├── lshift.cpp ├── lshiftr.cpp ├── notb.cpp ├── orb.cpp ├── rshift.cpp ├── rshiftr.cpp └── xorb.cpp ├── logical_comparisons ├── cmpeq.cpp ├── cmpge.cpp ├── cmpgt.cpp ├── cmple.cpp ├── cmplt.cpp └── cmpneq.cpp ├── main.cpp ├── math_functions ├── cos.cpp ├── exp.cpp ├── log.cpp ├── sin.cpp └── sincos.cpp ├── memory_operations ├── blend.cpp ├── combine.cpp ├── compress.cpp ├── deinterleave.cpp ├── gather.cpp ├── get.cpp ├── getfirst.cpp ├── high.cpp ├── interleave.cpp ├── interleavehi.cpp ├── interleavelo.cpp ├── load.cpp ├── low.cpp ├── lrot.cpp ├── rrot.cpp ├── scatter.cpp ├── set.cpp ├── shuff.cpp └── store.cpp ├── reductions ├── hadd.cpp ├── hmax.cpp ├── hmin.cpp ├── hmul.cpp └── testz.cpp ├── static_for.hpp └── static_for.hxx /.gitignore: -------------------------------------------------------------------------------- 1 | .gitignore 2 | .cproject 3 | .project 4 | cppcheck/ 5 | examples/a.out 6 | htdocs/ 7 | .settings/ 8 | src_files.txt 9 | tests/build* 10 | tests/code_coverage_files/ 11 | tests/code_coverage_report/ 12 | src/gen/* 13 | 14 | build* 15 | -------------------------------------------------------------------------------- /FindMIPP.cmake: -------------------------------------------------------------------------------- 1 | 2 | if(MIPP_INCLUDE_DIRS) 3 | set(MIPP_FIND_QUIETLY TRUE) 4 | endif() 5 | 6 | find_path(MIPP_INCLUDE_DIRS NAMES mipp.h 7 | HINTS 8 | ${MIPP_ROOT} 9 | $ENV{HOME}/.local 10 | PATH_SUFFIXES include/mipp 11 | ) 12 | 13 | include(FindPackageHandleStandardArgs) 14 | find_package_handle_standard_args(MIPP DEFAULT_MSG MIPP_INCLUDE_DIRS) 15 | 16 | if(MIPP_FOUND AND NOT TARGET MIPP) 17 | message(STATUS "MIPP_FOUND -> creating interface library MIPP at ${MIPP_INCLUDE_DIRS}") 18 | add_library(MIPP INTERFACE) 19 | target_compile_definitions(MIPP INTERFACE HAVE_MIPP=1) 20 | target_include_directories(MIPP INTERFACE ${MIPP_INCLUDE_DIRS}) 21 | target_compile_features(MIPP INTERFACE cxx_std_11) 22 | else() 23 | message(WARNING "MIPP not found.") 24 | endif() 25 | 26 | mark_as_advanced(MIPP_INCLUDE_DIRS) 27 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2017-2025 MIPP 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. -------------------------------------------------------------------------------- /TODO.md: -------------------------------------------------------------------------------- 1 | # TODO 2 | 3 | - [ ] Add left operator functions (ex: "5 + mipp::Reg" should work then!) 4 | - [ ] Fix and improve GitHub actions 5 | - [ ] Create a docker image with "Intel Software Development Emulator" to enable 6 | AVX-512 instructions emulation on the runners that does not support 7 | native AVX-512 8 | - [ ] Find a workaround for the 16-bit SSE `compress` that requires BMI2 9 | extension (remove `_pext_u32` dependency, available since Haswell) 10 | - [x] Add SVE to the code coverage 11 | - [x] Compile the examples in the `CMakeFiles.txt 12 | - [x] Add `compress` for NEONv1 with emulation of `vqtbl1q` based on two `vtbl2` 13 | - [x] Improve NEONv2 `shuff` operations with `vqtbl1q` instruction -------------------------------------------------------------------------------- /ci/analysis-cppcheck.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | set -x 3 | 4 | cppcheck --version 5 | 6 | mkdir cppcheck 7 | #cppcheck --suppress=missingIncludeSystem -I./src/ --force --enable=all --std=c++11 -U_MSC_VER ./src/ 2> cppcheck_all.log 8 | find .\/src\/ -type f -follow -print | grep "[.]h$\|[.]hpp$\|[.]hxx$\|[.]cpp$" > src_files.txt 9 | cppcheck --language=c++ --suppress=missingIncludeSystem --force --enable=all --std=c++11 -U_MSC_VER --file-list=src_files.txt 2> cppcheck/cppcheck_all.log 10 | cat cppcheck/cppcheck_all.log | grep "error:" > cppcheck/cppcheck_error.log 11 | cat cppcheck/cppcheck_all.log | grep "warning:" > cppcheck/cppcheck_warning.log 12 | cat cppcheck/cppcheck_all.log | grep "performance:" > cppcheck/cppcheck_performance.log 13 | cat cppcheck/cppcheck_all.log | grep "style:" > cppcheck/cppcheck_style.log 14 | cat cppcheck/cppcheck_all.log | grep "portability:" > cppcheck/cppcheck_portability.log 15 | cat cppcheck/cppcheck_all.log | grep "information:" > cppcheck/cppcheck_information.log 16 | cat cppcheck/cppcheck_all.log | grep "unusedFunction:" > cppcheck/cppcheck_unusedFunction.log 17 | cat cppcheck/cppcheck_all.log | grep "missingInclude:" > cppcheck/cppcheck_missingInclude.log 18 | 19 | COUNT=$(wc -l < cppcheck/cppcheck_error.log ) 20 | 21 | if [ $COUNT -gt 1 ]; then 22 | echo "Error count is $COUNT! cppcheck run failed :-(."; 23 | echo "" 24 | echo "Errors list:" 25 | cat cppcheck/cppcheck_error.log 26 | exit 1; 27 | elif [ $COUNT -gt 0 ]; then 28 | echo "There is one false positive error."; 29 | echo "" 30 | echo "Errors list:" 31 | cat cppcheck/cppcheck_error.log 32 | else 33 | echo "There is no error :-)." 34 | fi 35 | 36 | exit 0; -------------------------------------------------------------------------------- /ci/build-coverage-linux-armv7-gcc.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | set -x 3 | 4 | WD=$(pwd) 5 | 6 | function gen_coverage_info { 7 | build=$1 8 | mkdir $build 9 | cd $build 10 | cmake ../.. -G"Unix Makefiles" -DCMAKE_CXX_COMPILER=g++ -DCMAKE_BUILD_TYPE=Debug -DCMAKE_CXX_FLAGS_DEBUG="-g -O0" -DCMAKE_CXX_FLAGS="-Wall -funroll-loops -finline-functions --coverage $2" -DCMAKE_EXE_LINKER_FLAGS="--coverage" -DMIPP_STATIC_LIB=ON 11 | rc=$?; if [[ $rc != 0 ]]; then exit $rc; fi 12 | make -j $THREADS 13 | rc=$?; if [[ $rc != 0 ]]; then exit $rc; fi 14 | ./bin/run-tests 15 | cd .. 16 | # rc=$?; if [[ $rc != 0 ]]; then exit $rc; fi 17 | lcov --capture --directory $build/CMakeFiles/tests_exe.dir/tests/src/ --output-file code_coverage_files/$build.info 18 | # rc=$?; if [[ $rc != 0 ]]; then exit $rc; fi 19 | lcov --remove code_coverage_files/$build.info "*/usr*" "*lib/*" "*/tests/src*" --output-file code_coverage_files/$build.info 20 | # rc=$?; if [[ $rc != 0 ]]; then exit $rc; fi 21 | if [[ -s code_coverage_files/$build.info ]] 22 | then 23 | sed -i -e "s#${WD}#\.\.#g" code_coverage_files/$build.info 24 | else 25 | rm code_coverage_files/$build.info 26 | fi 27 | } 28 | 29 | python3 codegen/gen_compress.py 30 | 31 | cd tests 32 | mkdir code_coverage_files || true 33 | 34 | build_root=build_coverage_linux_armv7_gcc 35 | gen_coverage_info "${build_root}_nointr" "-DMIPP_NO_INTRINSICS" 36 | gen_coverage_info "${build_root}_neon" "-march=armv7-a -mfpu=neon" 37 | gen_coverage_info "${build_root}_neon_fma" "-march=armv7-a -mfpu=neon-vfpv4" 38 | -------------------------------------------------------------------------------- /ci/build-coverage-linux-armv8-gcc.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | set -x 3 | 4 | WD=$(pwd) 5 | 6 | function gen_coverage_info { 7 | build=$1 8 | mkdir $build 9 | cd $build 10 | cmake ../.. -G"Unix Makefiles" -DCMAKE_CXX_COMPILER=g++ -DCMAKE_BUILD_TYPE=Debug -DCMAKE_CXX_FLAGS_DEBUG="-g -O0" -DCMAKE_CXX_FLAGS="-Wall -funroll-loops -finline-functions --coverage $2" -DCMAKE_EXE_LINKER_FLAGS="--coverage" -DMIPP_STATIC_LIB=ON 11 | rc=$?; if [[ $rc != 0 ]]; then exit $rc; fi 12 | make -j $THREADS 13 | rc=$?; if [[ $rc != 0 ]]; then exit $rc; fi 14 | if [[ $3 == native ]]; then 15 | # execute the tests natively 16 | ./bin/run-tests 17 | else 18 | # use Arm Instruction Emulator (ArmIE) to execute the tests 19 | source /usr/share/modules/init/profile.sh 20 | module load armie22/22.0 21 | nbits=$(echo $build | grep -Eo '[0-9]+(\.[0-9]+)?' | tail -n 1) 22 | if [ -z "$nbits" ] 23 | then 24 | echo "The build name is incompatible with SVE build, it should contain the SIMD size (current wrong build name is '$build', an example of expected build name is: 'build_coverage_linux_armv8_gcc_sve_ls256')." 25 | exit 1 26 | fi 27 | armie -msve-vector-bits=$nbits -- ./bin/run-tests 28 | #qemu-aarch64 -cpu max,sve-default-vector-length=$nbits ./bin/run-tests 29 | fi 30 | cd .. 31 | # rc=$?; if [[ $rc != 0 ]]; then exit $rc; fi 32 | lcov --capture --directory $build/CMakeFiles/tests_exe.dir/tests/src/ --output-file code_coverage_files/$build.info 33 | # rc=$?; if [[ $rc != 0 ]]; then exit $rc; fi 34 | lcov --remove code_coverage_files/$build.info "*/usr*" "*lib/*" "*/tests/src*" --output-file code_coverage_files/$build.info 35 | # rc=$?; if [[ $rc != 0 ]]; then exit $rc; fi 36 | if [[ -s code_coverage_files/$build.info ]] 37 | then 38 | sed -i -e "s#${WD}#\.\.#g" code_coverage_files/$build.info 39 | else 40 | rm code_coverage_files/$build.info 41 | fi 42 | } 43 | 44 | python3 codegen/gen_compress.py 45 | 46 | cd tests 47 | mkdir code_coverage_files || true 48 | 49 | build_root=build_coverage_linux_armv8_gcc 50 | gen_coverage_info "${build_root}_nointr" "-DMIPP_NO_INTRINSICS" "native" 51 | gen_coverage_info "${build_root}_neon" "-march=armv8.1-a+simd" "native" 52 | gen_coverage_info "${build_root}_sve_ls128" "-march=armv8-a+sve -msve-vector-bits=128" "armie" 53 | gen_coverage_info "${build_root}_sve_ls256" "-march=armv8-a+sve -msve-vector-bits=256" "armie" 54 | gen_coverage_info "${build_root}_sve_ls512" "-march=armv8-a+sve -msve-vector-bits=512" "armie" 55 | gen_coverage_info "${build_root}_sve_ls1024" "-march=armv8-a+sve -msve-vector-bits=1024" "armie" 56 | -------------------------------------------------------------------------------- /ci/build-coverage-linux-x86-gcc.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | set -x 3 | 4 | WD=$(pwd) 5 | 6 | # install Intel SDE emulator -------------------------------------------------- 7 | apt update 8 | apt install -y wget xz-utils 9 | echo 0 > /proc/sys/kernel/yama/ptrace_scope 10 | mkdir softwares 11 | cd softwares 12 | wget https://largo.lip6.fr/monolithe/downloads/sde-external-9.33.0-2024-01-07-lin.tar.xz 13 | tar -xvvf sde-external-9.33.0-2024-01-07-lin.tar.xz 14 | ln -s $WD/softwares/sde-external-9.33.0-2024-01-07-lin $WD/softwares/sde 15 | export PATH=$WD/softwares/sde:$PATH 16 | cd .. 17 | # ----------------------------------------------------------------------------- 18 | 19 | function gen_coverage_info { 20 | build=$1 21 | mkdir $build 22 | cd $build 23 | cmake ../.. -G"Unix Makefiles" -DCMAKE_CXX_COMPILER=g++ -DCMAKE_BUILD_TYPE=Debug -DCMAKE_CXX_FLAGS_DEBUG="-g -O0" -DCMAKE_CXX_FLAGS="-Wall -funroll-loops -finline-functions --coverage $2" -DCMAKE_EXE_LINKER_FLAGS="--coverage" -DMIPP_STATIC_LIB=ON 24 | rc=$?; if [[ $rc != 0 ]]; then exit $rc; fi 25 | make -j $THREADS 26 | rc=$?; if [[ $rc != 0 ]]; then exit $rc; fi 27 | if [[ $3 == native ]]; then 28 | # execute the tests natively 29 | ./bin/run-tests 30 | else 31 | # use the Intel SDE emulator to execute the tests 32 | sde64 $3 -- ./bin/run-tests 33 | fi 34 | cd .. 35 | # rc=$?; if [[ $rc != 0 ]]; then exit $rc; fi 36 | lcov --capture --directory $build/CMakeFiles/tests_exe.dir/tests/src/ --output-file code_coverage_files/$build.info 37 | # rc=$?; if [[ $rc != 0 ]]; then exit $rc; fi 38 | lcov --remove code_coverage_files/$build.info "*/usr*" "*lib/*" "*/tests/src*" --output-file code_coverage_files/$build.info 39 | # rc=$?; if [[ $rc != 0 ]]; then exit $rc; fi 40 | if [[ -s code_coverage_files/$build.info ]] 41 | then 42 | sed -i -e "s#${WD}#\.\.#g" code_coverage_files/$build.info 43 | else 44 | rm code_coverage_files/$build.info 45 | fi 46 | } 47 | 48 | python3 codegen/gen_compress.py 49 | 50 | cd tests 51 | mkdir code_coverage_files || true 52 | 53 | build_root=build_coverage_linux_x86_gcc 54 | gen_coverage_info "${build_root}_nointr" "-DMIPP_NO_INTRINSICS" "native" 55 | gen_coverage_info "${build_root}_sse2" "-msse2" "native" 56 | gen_coverage_info "${build_root}_sse3" "-msse3" "native" 57 | gen_coverage_info "${build_root}_ssse3" "-mssse3" "native" 58 | gen_coverage_info "${build_root}_sse4_1" "-msse4.1" "native" 59 | gen_coverage_info "${build_root}_sse4_2" "-msse4.2" "native" 60 | gen_coverage_info "${build_root}_avx" "-mavx" "native" 61 | gen_coverage_info "${build_root}_avx2" "-mavx2" "native" 62 | gen_coverage_info "${build_root}_avx2_bmi2" "-mavx2 -mbmi2" "native" 63 | gen_coverage_info "${build_root}_avx2_bmi2_fma" "-mavx2 -mbmi2 -mfma" "native" 64 | gen_coverage_info "${build_root}_avx512f" "-mavx512f" "-skx" # skylake server architecture emulation 65 | gen_coverage_info "${build_root}_avx512bw" "-mavx512f -mavx512bw" "-skx" # skylake server architecture emulation 66 | gen_coverage_info "${build_root}_avx512vbmi2" "-mavx512f -mavx512bw -mavx512vbmi -mavx512vbmi2" "-spr" # sapphire rapid architecture emulation 67 | -------------------------------------------------------------------------------- /ci/build-linux-armv7-clang.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | set -x 3 | 4 | function compile { 5 | build=$1 6 | mkdir $build 7 | cd $build 8 | cmake .. -G"Unix Makefiles" -DCMAKE_CXX_COMPILER=clang++ -DCMAKE_BUILD_TYPE=Release -DCMAKE_CXX_FLAGS="-Wall -funroll-loops $2" -DMIPP_STATIC_LIB=ON 9 | rc=$?; if [[ $rc != 0 ]]; then exit $rc; fi 10 | make -j $THREADS 11 | rc=$?; if [[ $rc != 0 ]]; then exit $rc; fi 12 | cd .. 13 | } 14 | 15 | apt update 16 | apt install -y python3-pip 17 | pip3 install --user -r codegen/requirements.txt 18 | python3 codegen/gen_compress.py 19 | 20 | build_root=build_linux_armv7_clang 21 | compile "${build_root}_nointr" "-DMIPP_NO_INTRINSICS" 22 | compile "${build_root}_neon" "-march=armv7-a -mfpu=neon" 23 | compile "${build_root}_neon_fma" "-march=armv7-a -mfpu=neon-vfpv4" 24 | -------------------------------------------------------------------------------- /ci/build-linux-armv7-gcc.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | set -x 3 | 4 | function compile { 5 | build=$1 6 | mkdir $build 7 | cd $build 8 | cmake .. -G"Unix Makefiles" -DCMAKE_CXX_COMPILER=g++ -DCMAKE_BUILD_TYPE=Release -DCMAKE_CXX_FLAGS="-Wall -funroll-loops -finline-functions $2" -DMIPP_STATIC_LIB=ON 9 | rc=$?; if [[ $rc != 0 ]]; then exit $rc; fi 10 | make -j $THREADS 11 | rc=$?; if [[ $rc != 0 ]]; then exit $rc; fi 12 | cd .. 13 | } 14 | 15 | apt update 16 | apt install -y python3-pip 17 | pip3 install --user -r codegen/requirements.txt 18 | python3 codegen/gen_compress.py 19 | 20 | build_root=build_linux_armv7_gcc 21 | compile "${build_root}_nointr" "-DMIPP_NO_INTRINSICS" 22 | compile "${build_root}_neon" "-march=armv7-a -mfpu=neon" 23 | compile "${build_root}_neon_fma" "-march=armv7-a -mfpu=neon-vfpv4" 24 | -------------------------------------------------------------------------------- /ci/build-linux-armv8-clang.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | set -x 3 | 4 | function compile { 5 | build=$1 6 | mkdir $build 7 | cd $build 8 | cmake .. -G"Unix Makefiles" -DCMAKE_CXX_COMPILER=clang++ -DCMAKE_BUILD_TYPE=Release -DCMAKE_CXX_FLAGS="-Wall -funroll-loops $2" -DMIPP_STATIC_LIB=ON 9 | rc=$?; if [[ $rc != 0 ]]; then exit $rc; fi 10 | make -j $THREADS 11 | rc=$?; if [[ $rc != 0 ]]; then exit $rc; fi 12 | cd .. 13 | } 14 | 15 | apt update 16 | apt install -y python3-pip 17 | pip3 install --user -r codegen/requirements.txt 18 | python3 codegen/gen_compress.py 19 | 20 | build_root=build_linux_armv8_clang 21 | compile "${build_root}_nointr" "-DMIPP_NO_INTRINSICS" 22 | compile "${build_root}_neon" "-march=armv8.1-a+simd" 23 | -------------------------------------------------------------------------------- /ci/build-linux-armv8-gcc.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | set -x 3 | 4 | function compile { 5 | build=$1 6 | mkdir $build 7 | cd $build 8 | cmake .. -G"Unix Makefiles" -DCMAKE_CXX_COMPILER=g++ -DCMAKE_BUILD_TYPE=Release -DCMAKE_CXX_FLAGS="-Wall -funroll-loops -finline-functions $2" -DMIPP_STATIC_LIB=ON 9 | rc=$?; if [[ $rc != 0 ]]; then exit $rc; fi 10 | make -j $THREADS 11 | rc=$?; if [[ $rc != 0 ]]; then exit $rc; fi 12 | cd .. 13 | } 14 | 15 | apt update 16 | apt install -y python3-pip 17 | pip3 install --user -r codegen/requirements.txt 18 | python3 codegen/gen_compress.py 19 | 20 | build_root=build_linux_armv8_gcc 21 | compile "${build_root}_nointr" "-DMIPP_NO_INTRINSICS" 22 | compile "${build_root}_neon" "-march=armv8.1-a+simd" 23 | compile "${build_root}_sve_ls128" "-march=armv8-a+sve -msve-vector-bits=128" 24 | compile "${build_root}_sve_ls256" "-march=armv8-a+sve -msve-vector-bits=256" 25 | compile "${build_root}_sve_ls512" "-march=armv8-a+sve -msve-vector-bits=512" 26 | compile "${build_root}_sve_ls1024" "-march=armv8-a+sve -msve-vector-bits=1024" -------------------------------------------------------------------------------- /ci/build-linux-x86-clang.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | set -x 3 | 4 | function compile { 5 | build=$1 6 | mkdir $build 7 | cd $build 8 | cmake .. -G"Unix Makefiles" -DCMAKE_CXX_COMPILER=clang++ -DCMAKE_BUILD_TYPE=Release -DCMAKE_CXX_FLAGS="-Wall -funroll-loops $2" -DCMAKE_EXE_LINKER_FLAGS="-static -static-libgcc -static-libstdc++" -DCMAKE_EXE_LINKER_FLAGS="-pie" -DMIPP_STATIC_LIB=ON 9 | rc=$?; if [[ $rc != 0 ]]; then exit $rc; fi 10 | make -j $THREADS 11 | rc=$?; if [[ $rc != 0 ]]; then exit $rc; fi 12 | cd .. 13 | } 14 | 15 | apt update 16 | apt install -y python3-pip 17 | pip3 install --user -r codegen/requirements.txt 18 | python3 codegen/gen_compress.py 19 | 20 | build_root=build_linux_x86_clang 21 | compile "${build_root}_nointr" "-DMIPP_NO_INTRINSICS" 22 | compile "${build_root}_sse2" "-msse2" 23 | compile "${build_root}_sse3" "-msse3" 24 | compile "${build_root}_ssse3" "-mssse3" 25 | compile "${build_root}_sse4_1" "-msse4.1" 26 | compile "${build_root}_sse4_2" "-msse4.2" 27 | compile "${build_root}_avx" "-mavx" 28 | compile "${build_root}_avx2" "-mavx2" 29 | compile "${build_root}_avx2_bmi2" "-mavx2 -mbmi2" 30 | compile "${build_root}_avx2_bmi2_fma" "-mavx2 -mbmi2 -mfma" 31 | compile "${build_root}_avx512f" "-mavx512f" 32 | compile "${build_root}_avx512bw" "-mavx512f -mavx512bw" 33 | compile "${build_root}_avx512vbmi2" "-mavx512f -mavx512bw -mavx512vbmi -mavx512vbmi2" 34 | -------------------------------------------------------------------------------- /ci/build-linux-x86-gcc.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | set -x 3 | 4 | function compile { 5 | build=$1 6 | mkdir $build 7 | cd $build 8 | cmake .. -G"Unix Makefiles" -DCMAKE_CXX_COMPILER=g++ -DCMAKE_BUILD_TYPE=Release -DCMAKE_CXX_FLAGS="-Wall -funroll-loops -finline-functions $2" -DCMAKE_EXE_LINKER_FLAGS="-static -static-libgcc -static-libstdc++" -DMIPP_STATIC_LIB=ON 9 | rc=$?; if [[ $rc != 0 ]]; then exit $rc; fi 10 | make -j $THREADS 11 | rc=$?; if [[ $rc != 0 ]]; then exit $rc; fi 12 | cd .. 13 | } 14 | 15 | apt update 16 | apt install -y python3-pip 17 | pip3 install --user -r codegen/requirements.txt 18 | python3 codegen/gen_compress.py 19 | 20 | build_root=build_linux_x86_gcc 21 | compile "${build_root}_nointr" "-DMIPP_NO_INTRINSICS" 22 | compile "${build_root}_sse2" "-msse2" 23 | compile "${build_root}_sse3" "-msse3" 24 | compile "${build_root}_ssse3" "-mssse3" 25 | compile "${build_root}_sse4_1" "-msse4.1" 26 | compile "${build_root}_sse4_2" "-msse4.2" 27 | compile "${build_root}_avx" "-mavx" 28 | compile "${build_root}_avx2" "-mavx2" 29 | compile "${build_root}_avx2_bmi2" "-mavx2 -mbmi2" 30 | compile "${build_root}_avx2_bmi2_fma" "-mavx2 -mbmi2 -mfma" 31 | compile "${build_root}_avx512f" "-mavx512f" 32 | compile "${build_root}_avx512bw" "-mavx512f -mavx512bw" 33 | compile "${build_root}_avx512vbmi2" "-mavx512f -mavx512bw -mavx512vbmi -mavx512vbmi2" 34 | -------------------------------------------------------------------------------- /ci/build-linux-x86-icpc.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | set -x 3 | 4 | function compile { 5 | build=$1 6 | mkdir $build 7 | cd $build 8 | cmake .. -G"Unix Makefiles" -DCMAKE_CXX_COMPILER=icpc -DCMAKE_BUILD_TYPE=Release -DCMAKE_CXX_FLAGS="-Wall -funroll-loops -finline-functions -std=c++11 $2" -DCMAKE_EXE_LINKER_FLAGS="-static -static-libgcc -static-libstdc++" -DMIPP_STATIC_LIB=ON 9 | rc=$?; if [[ $rc != 0 ]]; then exit $rc; fi 10 | make -j $THREADS 11 | rc=$?; if [[ $rc != 0 ]]; then exit $rc; fi 12 | cd .. 13 | } 14 | 15 | pip3 install --user -r codegen/requirements.txt 16 | python3 codegen/gen_compress.py 17 | 18 | source /opt/intel/vars-intel.sh 19 | 20 | build_root=build_linux_x86_icpc 21 | compile "${build_root}_nointr" "-DMIPP_NO_INTRINSICS" 22 | compile "${build_root}_sse2" "-msse2" 23 | compile "${build_root}_sse3" "-msse3" 24 | compile "${build_root}_ssse3" "-mssse3" 25 | compile "${build_root}_sse4_1" "-msse4.1" 26 | compile "${build_root}_sse4_2" "-msse4.2" 27 | compile "${build_root}_avx" "-mavx" 28 | compile "${build_root}_avx2_fma" "-march=core-avx2 -fma" 29 | # compile "${build_root}_avx512mic" "-xMIC-AVX512" 30 | compile "${build_root}_avx512f" "-xCOMMON-AVX512" 31 | compile "${build_root}_avx512bw" "-xCORE-AVX512" -------------------------------------------------------------------------------- /ci/build-macos-x86-clang.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | set -x 3 | 4 | function compile { 5 | build=$1 6 | mkdir $build 7 | cd $build 8 | cmake .. -G"Unix Makefiles" -DCMAKE_CXX_COMPILER=clang++ -DCMAKE_BUILD_TYPE=Release -DCMAKE_CXX_FLAGS="-Wall -funroll-loops -finline-functions $2" -DMIPP_STATIC_LIB=ON 9 | rc=$?; if [[ $rc != 0 ]]; then exit $rc; fi 10 | make 11 | rc=$?; if [[ $rc != 0 ]]; then exit $rc; fi 12 | cd .. 13 | } 14 | 15 | apt update 16 | apt install -y python3-pip 17 | pip3 install --user -r codegen/requirements.txt 18 | python3 codegen/gen_compress.py 19 | 20 | build_root=build_macos_x86_clang 21 | compile "${build_root}_nointr" "-DMIPP_NO_INTRINSICS" 22 | compile "${build_root}_sse2" "-msse2" 23 | compile "${build_root}_sse3" "-msse3" 24 | compile "${build_root}_ssse3" "-mssse3" 25 | compile "${build_root}_sse4_1" "-msse4.1" 26 | compile "${build_root}_sse4_2" "-msse4.2" 27 | compile "${build_root}_avx" "-mavx" 28 | compile "${build_root}_avx2" "-mavx2" 29 | compile "${build_root}_avx2_fma" "-mavx2 -mfma" -------------------------------------------------------------------------------- /ci/build-windows-x86-gcc.bat: -------------------------------------------------------------------------------- 1 | @echo on 2 | 3 | call ./ci/tools/threads.bat 4 | 5 | rem set build_root=build_windows_x86_gcc 6 | call :compile "build_windows_x86_gcc_nointr" "-Wno-attributes -DMIPP_NO_INTRINSICS" 7 | call :compile "build_windows_x86_gcc_sse2" "-Wno-attributes -msse2" 8 | call :compile "build_windows_x86_gcc_sse3" "-Wno-attributes -msse3" 9 | call :compile "build_windows_x86_gcc_ssse3" "-Wno-attributes -mssse3" 10 | call :compile "build_windows_x86_gcc_sse4_1" "-Wno-attributes -msse4.1" 11 | call :compile "build_windows_x86_gcc_sse4_2" "-Wno-attributes -msse4.2" 12 | call :compile "build_windows_x86_gcc_avx" "-Wno-attributes -mavx" 13 | call :compile "build_windows_x86_gcc_avx2" "-Wno-attributes -mavx2" 14 | call :compile "build_windows_x86_gcc_avx2_fma" "-Wno-attributes -mavx2 -mfma" 15 | rem call :compile "build_windows_x86_gcc_avx512f" "-mavx512f" 16 | 17 | exit /B %ERRORLEVEL% 18 | 19 | :compile 20 | set build=%~1% 21 | set params=%~2% 22 | mkdir %build% 23 | cd %build% 24 | cmake .. -G"MinGW Makefiles" -DCMAKE_CXX_COMPILER=g++.exe -DCMAKE_BUILD_TYPE=Release -DCMAKE_CXX_FLAGS="-Wall -funroll-loops -finline-functions %params%" -DMIPP_STATIC_LIB=OFF 25 | if %ERRORLEVEL% neq 0 exit %ERRORLEVEL% 26 | mingw32-make -j %THREADS% 27 | if %ERRORLEVEL% neq 0 exit %ERRORLEVEL% 28 | cd .. 29 | exit /B 0 -------------------------------------------------------------------------------- /ci/build-windows-x86-msvc.bat: -------------------------------------------------------------------------------- 1 | @echo on 2 | 3 | call ./ci/tools/threads.bat 4 | 5 | set "VSCMD_START_DIR=%CD%" 6 | call "%VS_PATH%\VC\Auxiliary\Build\vcvars64.bat" 7 | 8 | rem set build_root=build_windows_x86_msvc 9 | call :compile "build_windows_x86_msvc_nointr" "-DMIPP_NO_INTRINSICS" 10 | call :compile "build_windows_x86_msvc_sse2" "/arch:SSE2 -D__SSE__ -D__SSE2__" 11 | call :compile "build_windows_x86_msvc_sse3" "/arch:SSE3 -D__SSE__ -D__SSE2__ -D__SSE3__" 12 | call :compile "build_windows_x86_msvc_ssse3" "/arch:SSSE3 -D__SSE__ -D__SSE2__ -D__SSE3__ -D__SSSE3__" 13 | call :compile "build_windows_x86_msvc_sse4_1" "/arch:SSE4.1 -D__SSE__ -D__SSE2__ -D__SSE3__ -D__SSSE3__ -D__SSE4_1__" 14 | call :compile "build_windows_x86_msvc_sse4_2" "/arch:SSE4.2 -D__SSE__ -D__SSE2__ -D__SSE3__ -D__SSSE3__ -D__SSE4_1__ -D__SSE4_2__" 15 | call :compile "build_windows_x86_msvc_avx" "/arch:AVX -D__SSE__ -D__SSE2__ -D__SSE3__ -D__SSSE3__ -D__SSE4_1__ -D__SSE4_2__ -D__AVX__" 16 | call :compile "build_windows_x86_msvc_avx2" "/arch:AVX2 -D__SSE__ -D__SSE2__ -D__SSE3__ -D__SSSE3__ -D__SSE4_1__ -D__SSE4_2__ -D__AVX__ -D__AVX2__" 17 | call :compile "build_windows_x86_msvc_avx2_fma" "/arch:AVX2 -D__SSE__ -D__SSE2__ -D__SSE3__ -D__SSSE3__ -D__SSE4_1__ -D__SSE4_2__ -D__AVX__ -D__AVX2__ -D__FMA__" 18 | rem call :compile "build_windows_x86_msvc_avx512f" "/arch:AVX512" 19 | 20 | exit /B %ERRORLEVEL% 21 | 22 | :compile 23 | set build=%~1% 24 | set params=%~2% 25 | mkdir %build% 26 | cd %build% 27 | cmake .. -G"Visual Studio 15 2017 Win64" -DCMAKE_CXX_FLAGS="-D_CRT_SECURE_NO_DEPRECATE /EHsc /MP%THREADS% %params%" -DMIPP_STATIC_LIB=OFF -DMIPP_EXAMPLES_EXE=OFF 28 | if %ERRORLEVEL% neq 0 exit %ERRORLEVEL% 29 | rem devenv /build Release MIPP.sln 30 | msbuild MIPP.sln /t:Build /p:Configuration=Release 31 | if %ERRORLEVEL% neq 0 exit %ERRORLEVEL% 32 | move bin\Release\* bin\ 33 | rmdir bin\Release\ 34 | cd .. 35 | exit /B 0 -------------------------------------------------------------------------------- /ci/coverage-linux.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | set -x 3 | 4 | cd tests 5 | mkdir code_coverage_report || true 6 | 7 | genhtml code_coverage_files/* --output-directory ./code_coverage_report/ 8 | -------------------------------------------------------------------------------- /ci/test-linux-intel-sde-unit.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | set -x 3 | 4 | if [ -z "$INTEL_SDE_ARCH" ] 5 | then 6 | echo "Please define the 'INTEL_SDE_ARCH' environment variable (ex.: -skx)." 7 | exit 1 8 | fi 9 | 10 | # install Intel SDE emulator -------------------------------------------------- 11 | apt update 12 | apt install -y wget xz-utils 13 | echo 0 > /proc/sys/kernel/yama/ptrace_scope 14 | WD=$(pwd) 15 | mkdir softwares 16 | cd softwares 17 | wget https://largo.lip6.fr/monolithe/downloads/sde-external-9.33.0-2024-01-07-lin.tar.xz 18 | tar -xvvf sde-external-9.33.0-2024-01-07-lin.tar.xz 19 | ln -s $WD/softwares/sde-external-9.33.0-2024-01-07-lin $WD/softwares/sde 20 | export PATH=$WD/softwares/sde:$PATH 21 | cd .. 22 | # ----------------------------------------------------------------------------- 23 | 24 | for build in "$@" 25 | do 26 | cd $build 27 | sde64 $INTEL_SDE_ARCH -- ./bin/run-tests 28 | rc=$?; if [[ $rc != 0 ]]; then exit $rc; fi 29 | cd .. 30 | done 31 | -------------------------------------------------------------------------------- /ci/test-linux-macos-unit.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | set -x 3 | 4 | for build in "$@" 5 | do 6 | cd $build 7 | if [[ "$build" == *"sve"* ]]; then 8 | source /usr/share/modules/init/profile.sh 9 | module load armie22/22.0 10 | nbits=$(echo $build | grep -Eo '[0-9]+(\.[0-9]+)?' | tail -n 1) 11 | if [ -z "$nbits" ] 12 | then 13 | echo "The build name is incompatible with SVE build, it should contain the SIMD size (current wrong build name is '$build', an example of expected build name is: 'build_coverage_linux_armv8_gcc_sve_ls256')." 14 | exit 1 15 | fi 16 | # armie -msve-vector-bits=$nbits -- ./bin/run_tests 2>&1 | tee sve_$nbits.txt 17 | # RES=$(cat sve_$nbits.txt | tail -3 | head -n 1) 18 | # if [[ "$RES" != *"test cases: 289 | 92 passed | 197 failed"* ]]; then 19 | # exit 1; 20 | # fi 21 | armie -msve-vector-bits=$nbits -- ./bin/run-tests 22 | rc=$?; if [[ $rc != 0 ]]; then exit $rc; fi 23 | else 24 | ./bin/run-tests 25 | rc=$?; if [[ $rc != 0 ]]; then exit $rc; fi 26 | fi 27 | cd .. 28 | done 29 | -------------------------------------------------------------------------------- /ci/test-windows-unit.bat: -------------------------------------------------------------------------------- 1 | @echo on 2 | 3 | :Loop 4 | IF "%~1"=="" goto End 5 | 6 | set build=%~1% 7 | cd %build%/bin/ 8 | run-tests.exe 9 | if %ERRORLEVEL% neq 0 exit %ERRORLEVEL% 10 | cd ../../ 11 | 12 | shift 13 | goto Loop 14 | 15 | :End 16 | -------------------------------------------------------------------------------- /ci/tools/build-linux-x86-gcc-generic.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | #set -x 3 | 4 | function compile { 5 | 6 | cd tests 7 | build_root=build_linux_x86_gcc 8 | build="${build_root}_$1" 9 | mkdir $build 10 | cd $build 11 | cmake .. -G"Unix Makefiles" -DCMAKE_CXX_COMPILER=g++ -DCMAKE_BUILD_TYPE=Release -DCMAKE_CXX_FLAGS="-Wall -funroll-loops -finline-functions $2" 12 | rc=$?; if [[ $rc != 0 ]]; then exit $rc; fi 13 | make -j $THREADS 14 | rc=$?; if [[ $rc != 0 ]]; then exit $rc; fi 15 | cd ../.. 16 | } 17 | -------------------------------------------------------------------------------- /ci/tools/threads.bat: -------------------------------------------------------------------------------- 1 | @echo off 2 | 3 | IF "%THREADS%" NEQ "" goto End 4 | set "THREADS=%NUMBER_OF_PROCESSORS%" 5 | IF "%THREADS%"=="" set "THREADS=1" 6 | 7 | :End -------------------------------------------------------------------------------- /ci/tools/threads.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | # set -x 3 | 4 | if [ -z "$THREADS" ]; then 5 | if [ ! -f /proc/cpuinfo ]; then 6 | export THREADS=1 7 | else 8 | THREADS=$(grep -c ^processor /proc/cpuinfo) 9 | export THREADS 10 | fi 11 | fi 12 | -------------------------------------------------------------------------------- /cmake/MIPPConfig.cmake.in: -------------------------------------------------------------------------------- 1 | set(MIPP_VERSION @MIPP_VERSION@) 2 | 3 | @PACKAGE_INIT@ 4 | 5 | set_and_check(MIPP_INC_DIR "@PACKAGE_INC_INSTALL_DIR@") 6 | 7 | # Add the targets file 8 | include("${CMAKE_CURRENT_LIST_DIR}/mippTargets.cmake") 9 | -------------------------------------------------------------------------------- /cmake/cmake_uninstall.cmake.in: -------------------------------------------------------------------------------- 1 | # http://www.vtk.org/Wiki/CMake_FAQ#Can_I_do_.22make_uninstall.22_with_CMake.3F 2 | 3 | IF(NOT EXISTS "@CMAKE_CURRENT_BINARY_DIR@/install_manifest.txt") 4 | MESSAGE(FATAL_ERROR "Cannot find install manifest: \"@CMAKE_CURRENT_BINARY_DIR@/install_manifest.txt\"") 5 | ENDIF(NOT EXISTS "@CMAKE_CURRENT_BINARY_DIR@/install_manifest.txt") 6 | 7 | FILE(READ "@CMAKE_CURRENT_BINARY_DIR@/install_manifest.txt" files) 8 | STRING(REGEX REPLACE "\n" ";" files "${files}") 9 | FOREACH(file ${files}) 10 | MESSAGE(STATUS "Uninstalling \"$ENV{DESTDIR}${file}\"") 11 | IF(EXISTS "$ENV{DESTDIR}${file}") 12 | EXEC_PROGRAM( 13 | "@CMAKE_COMMAND@" ARGS "-E remove \"$ENV{DESTDIR}${file}\"" 14 | OUTPUT_VARIABLE rm_out 15 | RETURN_VALUE rm_retval 16 | ) 17 | IF(NOT "${rm_retval}" STREQUAL 0) 18 | MESSAGE(FATAL_ERROR "Problem when removing \"$ENV{DESTDIR}${file}\"") 19 | ENDIF(NOT "${rm_retval}" STREQUAL 0) 20 | ELSEIF(IS_SYMLINK "$ENV{DESTDIR}${file}") 21 | EXEC_PROGRAM( 22 | "@CMAKE_COMMAND@" ARGS "-E remove \"$ENV{DESTDIR}${file}\"" 23 | OUTPUT_VARIABLE rm_out 24 | RETURN_VALUE rm_retval 25 | ) 26 | IF(NOT "${rm_retval}" STREQUAL 0) 27 | MESSAGE(FATAL_ERROR "Problem when removing \"$ENV{DESTDIR}${file}\"") 28 | ENDIF(NOT "${rm_retval}" STREQUAL 0) 29 | ELSE(EXISTS "$ENV{DESTDIR}${file}") 30 | MESSAGE(STATUS "File \"$ENV{DESTDIR}${file}\" does not exist.") 31 | ENDIF(EXISTS "$ENV{DESTDIR}${file}") 32 | ENDFOREACH(file) 33 | -------------------------------------------------------------------------------- /codegen/gen_compress.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | 3 | from jinja2 import Environment, FileSystemLoader 4 | from pathlib import Path 5 | import sys 6 | 7 | if sys.version_info[0] != 3 or sys.version_info[1] < 5: 8 | print("This script requires Python version 3.5") 9 | sys.exit(1) 10 | 11 | env = Environment(loader = FileSystemLoader("codegen")) 12 | template_lut = env.get_template("vcompress-LUT.cpp.j2") 13 | template_file = env.get_template("mipp_LUT.cpp.j2") 14 | 15 | def generate_lut(entries, simdwidth, words_per_simd): 16 | lut = [[0 for j in range(0, simdwidth)] for i in range(0, entries)] 17 | 18 | elem_bytes = simdwidth // words_per_simd 19 | 20 | for i in range(0, entries): 21 | mask = i 22 | j = 0 23 | 24 | for k in range(0, words_per_simd): 25 | for b in range(0, elem_bytes): 26 | lut[i][j + b] = k * elem_bytes + b 27 | 28 | if mask & 1 == 1: 29 | j += elem_bytes 30 | mask >>= 1 31 | 32 | for k in range(j, simdwidth): 33 | lut[i][k] = -1 34 | 35 | return lut 36 | 37 | def write_all_luts(filename, all_luts): 38 | all_content = template_file.render( 39 | luts = all_luts, 40 | ) 41 | 42 | with open(filename, "w+") as file: 43 | file.write(all_content) 44 | 45 | def generate_luts(filename, simdname, simdwidth, entrydef, lut_params_list): 46 | (entrytype, entrybytes) = entrydef 47 | 48 | all_luts = [] 49 | for entries, simd_words in lut_params_list: 50 | 51 | elem_bits = (simdwidth // simd_words) * entrybytes * 8 52 | 53 | lut = template_lut.render( 54 | lutname = f"vcompress_LUT{elem_bits}x{simd_words}_{simdname}", 55 | entries = entries, 56 | simdwidth = simdwidth, 57 | entrytype = entrytype, 58 | lut = generate_lut(entries, simdwidth, simd_words) 59 | ) 60 | 61 | all_luts += [lut] 62 | 63 | write_all_luts(filename, all_luts) 64 | 65 | def generate_AVX_luts(filename): 66 | lut_AVX_32x8 = template_lut.render( 67 | lutname = f"vcompress_LUT32x8_AVX", 68 | entries = 256, 69 | simdwidth = 8, 70 | entrytype = "int32_t", 71 | lut = generate_lut(256, 8, 8) 72 | ) 73 | 74 | lut_AVX_64x4 = template_lut.render( 75 | lutname = "vcompress_LUT64x4_AVX", 76 | entries = 16, 77 | simdwidth = 8, 78 | entrytype = "int32_t", 79 | lut = generate_lut(16, 8, 4) 80 | ) 81 | 82 | all_luts = [lut_AVX_32x8, lut_AVX_64x4] 83 | 84 | write_all_luts(filename, all_luts) 85 | pass 86 | 87 | Path("src/gen").mkdir(parents=True, exist_ok=True) 88 | 89 | generate_luts("src/gen/compress_LUT_SSE.cpp", "SSE", 16, ("int8_t", 1), [(4, 2), (16, 4), (256, 8), (65536, 16)]) 90 | generate_luts("src/gen/compress_LUT_NEON.cpp", "NEON", 16, ("int8_t", 1), [(4, 2), (16, 4), (256, 8), (65536, 16)]) 91 | 92 | #generate_luts("src/gen/mipp_compress_LUT_AVX.cpp", "AVX", 8, ("int32_t", 4), [(256, 8)]) 93 | generate_AVX_luts("src/gen/compress_LUT_AVX.cpp") 94 | -------------------------------------------------------------------------------- /codegen/mipp_LUT.cpp.j2: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | namespace mipp { 4 | 5 | {% for lut in luts -%} 6 | {{ lut }} 7 | 8 | {% endfor %} 9 | 10 | } 11 | -------------------------------------------------------------------------------- /codegen/requirements.txt: -------------------------------------------------------------------------------- 1 | jinja2==3.0.0 2 | -------------------------------------------------------------------------------- /codegen/vcompress-LUT.cpp.j2: -------------------------------------------------------------------------------- 1 | alignas(32) 2 | {{ entrytype }} {{ lutname }}[{{ entries }}][{{ simdwidth }}] = { 3 | {%- for i in range(0, entries) %} 4 | { 5 | {%- for j in range(0, simdwidth) -%} 6 | {{ "%3d" | format(lut[i][j]) }}, 7 | {%- endfor -%} 8 | }, 9 | {%- endfor %} 10 | }; 11 | -------------------------------------------------------------------------------- /examples/conversion.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | 5 | #include 6 | 7 | int main(int argc, char** argv) 8 | { 9 | // ------------------------------------------------------------------------ 10 | std::cout << "MIPP example" << std::endl; 11 | std::cout << "------------" << std::endl; 12 | std::cout << " - Instr. type: " << mipp::InstructionType << std::endl; 13 | std::cout << " - Instr. full type: " << mipp::InstructionFullType << std::endl; 14 | std::cout << " - Instr. version: " << mipp::InstructionVersion << std::endl; 15 | std::cout << " - Reg. size: " << mipp::RegisterSizeBit << " bits" << std::endl; 16 | std::cout << " - Reg. lanes: " << mipp::Lanes << std::endl; 17 | std::cout << " - 64-bit support: " << (mipp::Support64Bit ? "yes" : "no") << std::endl; 18 | std::cout << " - Byte/word support: " << (mipp::SupportByteWord ? "yes" : "no") << std::endl; 19 | auto ext = mipp::InstructionExtensions(); 20 | if (ext.size() > 0) 21 | { 22 | std::cout << " - Instr. extensions: {"; 23 | for (auto i = 0; i < (int)ext.size(); i++) 24 | std::cout << ext[i] << (i < ((int)ext.size() -1) ? ", " : ""); 25 | std::cout << "}" << std::endl; 26 | } 27 | std::cout << std::endl; 28 | // ------------------------------------------------------------------------ 29 | 30 | std::random_device rd; 31 | std::mt19937 g(rd()); 32 | 33 | signed char t_1[mipp::N()]; 34 | for (auto i = 0; i < mipp::N(); i++) t_1[i] = i+1; 35 | // std::shuffle(t_1, t_1 + mipp::N(), g); 36 | 37 | mipp::Reg in_1; in_1.loadu(t_1); 38 | 39 | std::cout << "---------------------------- int8" << std::endl << std::endl; 40 | 41 | std::cout << "Input vectors (signed char): " << std::endl; 42 | std::cout << "in_1 = " << in_1 << std::endl; 43 | std::cout << std::endl; 44 | 45 | std::cout << "--------------------------- int16" << std::endl << std::endl; 46 | 47 | auto low = in_1.low (); 48 | auto high = in_1.high(); 49 | 50 | auto low_short = low.template cvt(); 51 | std::cout << "Output vectors (low.cvt()): " << std::endl; 52 | std::cout << "low = " << low_short << std::endl; 53 | std::cout << std::endl; 54 | 55 | auto high_short = high.template cvt(); 56 | std::cout << "Output vectors (hight.cvt()): " << std::endl; 57 | std::cout << "high = " << high_short << std::endl; 58 | std::cout << std::endl; 59 | 60 | std::cout << "--------------------------- int32" << std::endl << std::endl; 61 | 62 | auto low_low_short = low_short.low(); 63 | auto high_low_short = low_short.high(); 64 | 65 | auto low_low_int = low_low_short.template cvt(); 66 | std::cout << "Output vectors (low_low_short.cvt()): " << std::endl; 67 | std::cout << "low = " << low_low_int << std::endl; 68 | std::cout << std::endl; 69 | 70 | auto high_low_int = high_low_short.template cvt(); 71 | std::cout << "Output vectors (high_low_short.cvt()): " << std::endl; 72 | std::cout << "high = " << high_low_int << std::endl; 73 | std::cout << std::endl; 74 | 75 | auto low_high_short = high_short.low(); 76 | auto high_high_short = high_short.high(); 77 | 78 | auto low_high_int = low_high_short.template cvt(); 79 | std::cout << "Output vectors (low_high_short.cvt()): " << std::endl; 80 | std::cout << "low = " << low_high_int << std::endl; 81 | std::cout << std::endl; 82 | 83 | auto high_high_int = high_high_short.template cvt(); 84 | std::cout << "Output vectors (high_high_short.cvt()): " << std::endl; 85 | std::cout << "high = " << high_high_int << std::endl; 86 | std::cout << std::endl; 87 | 88 | std::cout << "------------------------- float32" << std::endl << std::endl; 89 | 90 | auto low_low_float = low_low_int.template cvt() + 0.1f; 91 | std::cout << "Output vectors (low_low_int.cvt() + 0.1f): " << std::endl; 92 | std::cout << "p1 = " << low_low_float << std::endl; 93 | std::cout << std::endl; 94 | 95 | auto high_low_float = high_low_int.template cvt() + 0.1f; 96 | std::cout << "Output vectors (high_low_int.cvt() + 0.1f): " << std::endl; 97 | std::cout << "p2 = " << high_low_float << std::endl; 98 | std::cout << std::endl; 99 | 100 | auto low_high_float = low_high_int.template cvt() + 0.1f; 101 | std::cout << "Output vectors (low_high_int.cvt() + 0.1f): " << std::endl; 102 | std::cout << "p3 = " << low_high_float << std::endl; 103 | std::cout << std::endl; 104 | 105 | auto high_high_float = high_high_int.template cvt() + 0.1f; 106 | std::cout << "Output vectors (high_high_int.cvt() + 0.1f): " << std::endl; 107 | std::cout << "p4 = " << high_high_float << std::endl; 108 | std::cout << std::endl; 109 | 110 | return 0; 111 | } 112 | -------------------------------------------------------------------------------- /examples/gemm.cpp: -------------------------------------------------------------------------------- 1 | #ifndef BLOCKI 2 | #define BLOCKI 64 3 | #endif 4 | #ifndef BLOCKJ 5 | #define BLOCKJ 64 6 | #endif 7 | #ifndef BLOCKK 8 | #define BLOCKK 256 9 | #endif 10 | 11 | #ifndef TYPE 12 | #define TYPE double 13 | #endif 14 | #include 15 | #include 16 | 17 | extern "C" { 18 | double k_1x1x1(TYPE *vA, TYPE *vB, TYPE *vC); 19 | } 20 | TYPE k_1x1x1(TYPE *vA, TYPE *vB, TYPE *vC) { 21 | const int nv=mipp::N(); 22 | TYPE *B=&vB[0]; 23 | TYPE *C=&vC[0]; 24 | TYPE *A=&vA[0]; 25 | for (int i=0;i c00; 28 | c00.load(&C[(i+0)*BLOCKJ + j + (0)*nv]); 29 | for (int k=0;k a00; 31 | a00 = mipp::set1(A[(i+0)*BLOCKK+k+0]); 32 | mipp::Reg b00; 33 | b00.load(&B[(k+0)*BLOCKJ + j + (0)*nv]); 34 | c00 =mipp::fmadd(a00, b00,c00); 35 | } 36 | c00.store(&C[(i+0)*BLOCKJ + j + (0)*nv]); 37 | } 38 | } 39 | return C[0]; 40 | } 41 | 42 | int main(int argc, char** argv) 43 | { 44 | // -------------------------------------------------------------------------- 45 | std::cout << "MIPP example" << std::endl; 46 | std::cout << "------------" << std::endl; 47 | std::cout << " - Instr. type: " << mipp::InstructionType << std::endl; 48 | std::cout << " - Instr. full type: " << mipp::InstructionFullType << std::endl; 49 | std::cout << " - Instr. version: " << mipp::InstructionVersion << std::endl; 50 | std::cout << " - Reg. size: " << mipp::RegisterSizeBit << " bits" << std::endl; 51 | std::cout << " - Reg. lanes: " << mipp::Lanes << std::endl; 52 | std::cout << " - 64-bit support: " << (mipp::Support64Bit ? "yes" : "no") << std::endl; 53 | std::cout << " - Byte/word support: " << (mipp::SupportByteWord ? "yes" : "no") << std::endl; 54 | auto ext = mipp::InstructionExtensions(); 55 | if (ext.size() > 0) 56 | { 57 | std::cout << " - Instr. extensions: {"; 58 | for (auto i = 0; i < (int)ext.size(); i++) 59 | std::cout << ext[i] << (i < ((int)ext.size() -1) ? ", " : ""); 60 | std::cout << "}" << std::endl; 61 | } 62 | std::cout << std::endl; 63 | // -------------------------------------------------------------------------- 64 | 65 | mipp::vector A(BLOCKI*BLOCKK, 1); 66 | mipp::vector B(BLOCKK*BLOCKJ, 2); 67 | mipp::vector C(BLOCKI*BLOCKJ, 3); 68 | TYPE s=0; 69 | for (int i=0;i<100;i++) 70 | s+=k_1x1x1(&A[0],&B[0],&C[0]); 71 | std::cout << s << std::endl; 72 | return 0; 73 | } 74 | 75 | -------------------------------------------------------------------------------- /examples/initreg.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | 5 | #include 6 | 7 | int main(int argc, char** argv) 8 | { 9 | // ------------------------------------------------------------------------ 10 | std::cout << "MIPP example" << std::endl; 11 | std::cout << "------------" << std::endl; 12 | std::cout << " - Instr. type: " << mipp::InstructionType << std::endl; 13 | std::cout << " - Instr. full type: " << mipp::InstructionFullType << std::endl; 14 | std::cout << " - Instr. version: " << mipp::InstructionVersion << std::endl; 15 | std::cout << " - Reg. size: " << mipp::RegisterSizeBit << " bits" << std::endl; 16 | std::cout << " - Reg. lanes: " << mipp::Lanes << std::endl; 17 | std::cout << " - 64-bit support: " << (mipp::Support64Bit ? "yes" : "no") << std::endl; 18 | std::cout << " - Byte/word support: " << (mipp::SupportByteWord ? "yes" : "no") << std::endl; 19 | auto ext = mipp::InstructionExtensions(); 20 | if (ext.size() > 0) 21 | { 22 | std::cout << " - Instr. extensions: {"; 23 | for (auto i = 0; i < (int)ext.size(); i++) 24 | std::cout << ext[i] << (i < ((int)ext.size() -1) ? ", " : ""); 25 | std::cout << "}" << std::endl; 26 | } 27 | std::cout << std::endl; 28 | // ------------------------------------------------------------------------ 29 | 30 | std::random_device rd; 31 | std::mt19937 g(rd()); 32 | 33 | using type = float; 34 | 35 | type t_1[mipp::N()]; 36 | for (auto i = 0; i < mipp::N(); i++) t_1[i] = i; 37 | std::shuffle(t_1, t_1 + mipp::N(), g); 38 | 39 | mipp::Reg in_1 = {0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31}; 40 | mipp::Reg in_2 = 12; 41 | mipp::Reg in_3 = t_1; 42 | 43 | std::cout << "Input vectors: " << std::endl; 44 | std::cout << "in_1 = " << in_1 << std::endl; 45 | std::cout << "in_2 = " << in_2 << std::endl; 46 | std::cout << "in_3 = " << in_3 << std::endl; 47 | 48 | return 0; 49 | } 50 | -------------------------------------------------------------------------------- /examples/mask.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | 5 | #include 6 | 7 | int main(int argc, char** argv) 8 | { 9 | // ------------------------------------------------------------------------ 10 | std::cout << "MIPP example" << std::endl; 11 | std::cout << "------------" << std::endl; 12 | std::cout << " - Instr. type: " << mipp::InstructionType << std::endl; 13 | std::cout << " - Instr. full type: " << mipp::InstructionFullType << std::endl; 14 | std::cout << " - Instr. version: " << mipp::InstructionVersion << std::endl; 15 | std::cout << " - Reg. size: " << mipp::RegisterSizeBit << " bits" << std::endl; 16 | std::cout << " - Reg. lanes: " << mipp::Lanes << std::endl; 17 | std::cout << " - 64-bit support: " << (mipp::Support64Bit ? "yes" : "no") << std::endl; 18 | std::cout << " - Byte/word support: " << (mipp::SupportByteWord ? "yes" : "no") << std::endl; 19 | auto ext = mipp::InstructionExtensions(); 20 | if (ext.size() > 0) 21 | { 22 | std::cout << " - Instr. extensions: {"; 23 | for (auto i = 0; i < (int)ext.size(); i++) 24 | std::cout << ext[i] << (i < ((int)ext.size() -1) ? ", " : ""); 25 | std::cout << "}" << std::endl; 26 | } 27 | std::cout << std::endl; 28 | // ------------------------------------------------------------------------ 29 | 30 | std::random_device rd; 31 | std::mt19937 g(rd()); 32 | 33 | using T = float; 34 | constexpr int N = mipp::N(); 35 | 36 | T t_1[N]; 37 | for (auto i = 0; i < N; i++) t_1[i] = i+1; 38 | std::shuffle(t_1, t_1 + N, g); 39 | 40 | T t_2[N]; 41 | for (auto i = 0; i < N; i++) t_2[i] = i+1; 42 | std::shuffle(t_2, t_2 + N, g); 43 | 44 | mipp::Reg in_1 = t_1; 45 | mipp::Reg in_2 = t_2; 46 | mipp::Reg in_3 = (T)-1; 47 | mipp::Msk m_1 = false; 48 | mipp::Msk m_2 = true; 49 | mipp::Msk m_3 = {true, false,true, true, false,true ,true ,true , 50 | false,false,false,false,false,true ,false,true , 51 | false,true ,true ,true ,false,true ,false,true , 52 | true ,true ,false,true ,false,false,false,false}; 53 | mipp::Msk m_4 = {false,true ,false,false,true ,false,false,false, 54 | true ,true ,true ,true ,true ,false,true ,false, 55 | true ,false,false,false,true ,false,true ,false, 56 | false,false,true ,false,true ,true ,true ,true }; 57 | 58 | std::cout << "Input vectors: " << std::endl; 59 | std::cout << "in_1 = " << in_1 << std::endl; 60 | std::cout << "in_2 = " << in_2 << std::endl; 61 | std::cout << "in_3 = " << in_3 << std::endl; 62 | std::cout << "m_1 = " << m_1 << std::endl; 63 | std::cout << "m_2 = " << m_2 << std::endl; 64 | std::cout << "m_3 = " << m_3 << std::endl; 65 | std::cout << "m_4 = " << m_4 << std::endl; 66 | std::cout << std::endl; 67 | 68 | std::cout << "r_1 = m_1.toReg(): " << std::endl; 69 | mipp::Reg r_1 = m_1.toReg(); 70 | std::cout << "r_1 = " << r_1 << std::endl; 71 | std::cout << std::endl; 72 | 73 | std::cout << "r_2 = m_2.toReg(): " << std::endl; 74 | mipp::Reg r_2 = m_2.toReg(); 75 | std::cout << "r_2 = " << r_2 << std::endl; 76 | std::cout << std::endl; 77 | 78 | std::cout << "Output vector (mout = m_3; mout.set0()): " << std::endl; 79 | auto mout = m_3; 80 | mout.set0(); 81 | std::cout << "mout = " << mout << std::endl; 82 | std::cout << std::endl; 83 | 84 | std::cout << "Output vector (m_3 & (in_1 - in_2)): " << std::endl; 85 | auto out = mipp::maskz(m_3, in_1, in_2); 86 | std::cout << "out = " << out << std::endl; 87 | std::cout << std::endl; 88 | 89 | std::cout << "Output mask (in_1 > in_2): " << std::endl; 90 | auto m_5 = in_1 > in_2; 91 | std::cout << "m_5 = " << m_5 << std::endl; 92 | std::cout << std::endl; 93 | 94 | std::cout << "Output vector ((m_5 & (in_1 + in_2)) | (~m_5 & in_3)): " << std::endl; 95 | out = mipp::mask(m_5, in_3, in_1, in_2); 96 | std::cout << "out = " << out << std::endl; 97 | std::cout << std::endl; 98 | 99 | std::cout << "Output vector (m_3 ^ m_4): " << std::endl; 100 | mout = m_3 | m_4; 101 | std::cout << "mout = " << mout << std::endl; 102 | std::cout << std::endl; 103 | 104 | std::cout << "Output vector (m_3 | m_4): " << std::endl; 105 | mout = m_3 ^ m_4; 106 | std::cout << "mout = " << mout << std::endl; 107 | std::cout << std::endl; 108 | 109 | std::cout << "Output vector (m_3 & m_4): " << std::endl; 110 | mout = m_3 & m_4; 111 | std::cout << "mout = " << mout << std::endl; 112 | std::cout << std::endl; 113 | 114 | std::cout << "Output vector (~mout): " << std::endl; 115 | mout = ~mout; 116 | std::cout << "mout = " << mout << std::endl; 117 | std::cout << std::endl; 118 | 119 | std::cout << "Output vector (in_1 - in_2): " << std::endl; 120 | auto out2 = in_1 - in_2; 121 | std::cout << "out2 = " << out2 << std::endl; 122 | std::cout << std::endl; 123 | 124 | std::cout << "Output vector (mipp::sign(out2)): " << std::endl; 125 | mout = mipp::sign(out2); 126 | std::cout << "mout = " << mout << std::endl; 127 | std::cout << std::endl; 128 | 129 | std::cout << "Output vector (mipp::neg(out2, mout)): " << std::endl; 130 | out2 = mipp::neg(out2, mout); 131 | std::cout << "out2 = " << out2 << std::endl; 132 | std::cout << std::endl; 133 | 134 | #if !defined(MIPP_NEON) 135 | std::cout << "Output vector (m_2 << 0): " << std::endl; 136 | mout = m_2 << 0; 137 | std::cout << "mout = " << mout << std::endl; 138 | std::cout << std::endl; 139 | 140 | std::cout << "Output vector (m_2 << 1): " << std::endl; 141 | mout = m_2 << 1; 142 | std::cout << "mout = " << mout << std::endl; 143 | std::cout << std::endl; 144 | 145 | std::cout << "Output vector (m_2 >> 2): " << std::endl; 146 | mout = m_2 >> 2; 147 | std::cout << "mout = " << mout << std::endl; 148 | std::cout << std::endl; 149 | #endif 150 | 151 | return 0; 152 | } 153 | -------------------------------------------------------------------------------- /examples/mathfun.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | 5 | #include 6 | 7 | int main(int argc, char** argv) 8 | { 9 | // ------------------------------------------------------------------------ 10 | std::cout << "MIPP example" << std::endl; 11 | std::cout << "------------" << std::endl; 12 | std::cout << " - Instr. type: " << mipp::InstructionType << std::endl; 13 | std::cout << " - Instr. full type: " << mipp::InstructionFullType << std::endl; 14 | std::cout << " - Instr. version: " << mipp::InstructionVersion << std::endl; 15 | std::cout << " - Reg. size: " << mipp::RegisterSizeBit << " bits" << std::endl; 16 | std::cout << " - Reg. lanes: " << mipp::Lanes << std::endl; 17 | std::cout << " - 64-bit support: " << (mipp::Support64Bit ? "yes" : "no") << std::endl; 18 | std::cout << " - Byte/word support: " << (mipp::SupportByteWord ? "yes" : "no") << std::endl; 19 | auto ext = mipp::InstructionExtensions(); 20 | if (ext.size() > 0) 21 | { 22 | std::cout << " - Instr. extensions: {"; 23 | for (auto i = 0; i < (int)ext.size(); i++) 24 | std::cout << ext[i] << (i < ((int)ext.size() -1) ? ", " : ""); 25 | std::cout << "}" << std::endl; 26 | } 27 | std::cout << std::endl; 28 | // ------------------------------------------------------------------------ 29 | 30 | std::random_device rd; 31 | std::mt19937 g(rd()); 32 | 33 | using type = float; 34 | 35 | type t_1[mipp::N()]; 36 | for (auto i = 0; i < mipp::N(); i++) t_1[i] = 1; 37 | std::shuffle(t_1, t_1 + mipp::N(), g); 38 | 39 | type t_2[mipp::N()]; 40 | for (auto i = 0; i < mipp::N(); i++) t_2[i] = i+1; 41 | std::shuffle(t_2, t_2 + mipp::N(), g); 42 | 43 | type t_3[mipp::N()]; 44 | for (auto i = 0; i < mipp::N(); i++) t_3[i] = 3.14; 45 | std::shuffle(t_3, t_3 + mipp::N(), g); 46 | 47 | mipp::Reg in_1; in_1.loadu(t_1); 48 | mipp::Reg in_2; in_2.loadu(t_2); 49 | mipp::Reg in_3; in_3.loadu(t_3); 50 | 51 | std::cout << "Input vectors: " << std::endl; 52 | std::cout << "in_1 = " << in_1 << std::endl; 53 | std::cout << "in_2 = " << in_2 << std::endl; 54 | std::cout << std::endl; 55 | 56 | auto out = in_1.log(); 57 | std::cout << "Output vectors (in_1.log()): " << std::endl; 58 | std::cout << "out = " << out << std::endl; 59 | std::cout << std::endl; 60 | 61 | out = mipp::exp(in_1); 62 | std::cout << "Output vectors (mipp::exp(in_1)): " << std::endl; 63 | std::cout << "out = " << out << std::endl; 64 | std::cout << std::endl; 65 | 66 | out = in_2.exp(); 67 | std::cout << "Output vectors (in_2.exp()): " << std::endl; 68 | std::cout << "out = " << out << std::endl; 69 | std::cout << std::endl; 70 | 71 | out = in_3.sin(); 72 | std::cout << "Output vectors (in_3.sin()): " << std::endl; 73 | std::cout << "out = " << out << std::endl; 74 | std::cout << std::endl; 75 | 76 | out = in_3.cos(); 77 | std::cout << "Output vectors (in_3.cos()): " << std::endl; 78 | std::cout << "out = " << out << std::endl; 79 | std::cout << std::endl; 80 | 81 | mipp::Reg rsin, rcos; 82 | in_3.sincos(rsin, rcos); 83 | std::cout << "Output vectors (in_3.sincos(rsin, rcos)): " << std::endl; 84 | std::cout << "rsin = " << rsin << std::endl; 85 | std::cout << "rcos = " << rcos << std::endl; 86 | std::cout << std::endl; 87 | 88 | return 0; 89 | } 90 | -------------------------------------------------------------------------------- /examples/operator.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | 5 | #include 6 | 7 | int main(int argc, char** argv) 8 | { 9 | // ------------------------------------------------------------------------ 10 | std::cout << "MIPP example" << std::endl; 11 | std::cout << "------------" << std::endl; 12 | std::cout << " - Instr. type: " << mipp::InstructionType << std::endl; 13 | std::cout << " - Instr. full type: " << mipp::InstructionFullType << std::endl; 14 | std::cout << " - Instr. version: " << mipp::InstructionVersion << std::endl; 15 | std::cout << " - Reg. size: " << mipp::RegisterSizeBit << " bits" << std::endl; 16 | std::cout << " - Reg. lanes: " << mipp::Lanes << std::endl; 17 | std::cout << " - 64-bit support: " << (mipp::Support64Bit ? "yes" : "no") << std::endl; 18 | std::cout << " - Byte/word support: " << (mipp::SupportByteWord ? "yes" : "no") << std::endl; 19 | auto ext = mipp::InstructionExtensions(); 20 | if (ext.size() > 0) 21 | { 22 | std::cout << " - Instr. extensions: {"; 23 | for (auto i = 0; i < (int)ext.size(); i++) 24 | std::cout << ext[i] << (i < ((int)ext.size() -1) ? ", " : ""); 25 | std::cout << "}" << std::endl; 26 | } 27 | std::cout << std::endl; 28 | // ------------------------------------------------------------------------ 29 | 30 | std::random_device rd; 31 | std::mt19937 g(rd()); 32 | 33 | using type = float; 34 | 35 | type t_1[mipp::N()]; 36 | for (auto i = 0; i < mipp::N(); i++) t_1[i] = i+1; 37 | std::shuffle(t_1, t_1 + mipp::N(), g); 38 | 39 | type t_2[mipp::N()]; 40 | for (auto i = 0; i < mipp::N(); i++) t_2[i] = i+1; 41 | std::shuffle(t_2, t_2 + mipp::N(), g); 42 | 43 | mipp::Reg in_1(t_1); 44 | mipp::Reg in_2(t_2); 45 | 46 | std::cout << "Input vectors: " << std::endl; 47 | std::cout << "in_1 = " << in_1 << std::endl; 48 | std::cout << "in_2 = " << in_2 << std::endl; 49 | std::cout << std::endl; 50 | 51 | auto out = in_1 + in_2; 52 | // auto out = in_1.add(in_2); 53 | std::cout << "Output vectors (in_1 + in_2): " << std::endl; 54 | std::cout << "out = " << out << std::endl; 55 | std::cout << std::endl; 56 | 57 | out = in_1 - in_2; 58 | std::cout << "Output vectors (in_1 - in_2): " << std::endl; 59 | std::cout << "out = " << out << std::endl; 60 | std::cout << std::endl; 61 | 62 | #ifndef MIPP_NO_INTRINSICS 63 | if (typeid(type) == typeid(int) || typeid(type) == typeid(short) || typeid(type) == typeid(signed char)) 64 | { 65 | out = in_1; 66 | out = ~out; 67 | std::cout << "Output vectors (~in_1): " << std::endl; 68 | std::cout << "out = " << out << std::endl; 69 | std::cout << std::endl; 70 | 71 | out = in_1 | in_2; 72 | std::cout << "Output vectors (in_1 | in_2): " << std::endl; 73 | std::cout << "out = " << out << std::endl; 74 | std::cout << std::endl; 75 | 76 | out = in_1 << 1; 77 | std::cout << "Output vectors (in_1 << 1): " << std::endl; 78 | std::cout << "out = " << out << std::endl; 79 | std::cout << std::endl; 80 | 81 | out = in_1 >> 1; 82 | std::cout << "Output vectors (in_1 >> 1): " << std::endl; 83 | std::cout << "out = " << out << std::endl; 84 | std::cout << std::endl; 85 | } 86 | #endif 87 | 88 | auto msk = (in_1 == in_2); 89 | std::cout << "Output vectors (in_1 == in_2): " << std::endl; 90 | std::cout << "msk = " << msk << std::endl; 91 | std::cout << std::endl; 92 | 93 | msk = (in_1 > in_2); 94 | std::cout << "Output vectors (in_1 > in_2): " << std::endl; 95 | std::cout << "msk = " << msk << std::endl; 96 | std::cout << std::endl; 97 | 98 | if (typeid(type) == typeid(double) || typeid(type) == typeid(float)) 99 | { 100 | msk = (in_1 != in_2); 101 | std::cout << "Output vectors (in_1 != in_2): " << std::endl; 102 | std::cout << "msk = " << msk << std::endl; 103 | std::cout << std::endl; 104 | 105 | msk = (in_1 < in_2); 106 | std::cout << "Output vectors (in_1 < in_2): " << std::endl; 107 | std::cout << "msk = " << msk << std::endl; 108 | std::cout << std::endl; 109 | 110 | msk = (in_1 <= in_2); 111 | std::cout << "Output vectors (in_1 <= in_2): " << std::endl; 112 | std::cout << "msk = " << msk << std::endl; 113 | std::cout << std::endl; 114 | 115 | msk = (in_1 >= in_2); 116 | std::cout << "Output vectors (in_1 >= in_2): " << std::endl; 117 | std::cout << "msk = " << msk << std::endl; 118 | std::cout << std::endl; 119 | 120 | out = in_1 * in_2; 121 | std::cout << "Output vectors (in_1 * in_2): " << std::endl; 122 | std::cout << "out = " << out << std::endl; 123 | std::cout << std::endl; 124 | 125 | out = in_1 / in_2; 126 | std::cout << "Output vectors (in_1 / in_2): " << std::endl; 127 | std::cout << "out = " << out << std::endl; 128 | std::cout << std::endl; 129 | 130 | auto two = mipp::Reg(2); 131 | out = in_1 + in_2 * two; 132 | std::cout << "Output vectors (in_1 + in_2 * two): " << std::endl; 133 | std::cout << "out = " << out << std::endl; 134 | std::cout << std::endl; 135 | } 136 | 137 | out = in_1.hmin(); 138 | std::cout << "Output vectors (in_1.hmin()): " << std::endl; 139 | std::cout << "out = " << out << std::endl; 140 | std::cout << std::endl; 141 | 142 | out = in_1 * 3 + 2; 143 | std::cout << "Output vectors (in_1 * 3 + 2): " << std::endl; 144 | std::cout << "out = " << out << std::endl; 145 | std::cout << std::endl; 146 | 147 | return 0; 148 | } 149 | -------------------------------------------------------------------------------- /examples/sort/Quick_sorter.hpp: -------------------------------------------------------------------------------- 1 | #ifndef QUICK_SORTER_HPP 2 | #define QUICK_SORTER_HPP 3 | 4 | #include 5 | #include 6 | #include 7 | #include 8 | 9 | #include 10 | 11 | template 12 | class Quick_sorter 13 | { 14 | private: 15 | const int size; 16 | mipp::vector R; 17 | mipp::vector K; 18 | public: 19 | Quick_sorter(const int size) : size(size), R(size +2), K(size + 2) 20 | { 21 | std::iota(R.begin() +1 , R.begin() + this->size +1 , 0); 22 | 23 | K[0] = std::numeric_limits::min(); 24 | K[size +1] = std::numeric_limits::max(); 25 | } 26 | 27 | void sort(const T* values, std::vector &pos, const int p_sort = -1) 28 | { 29 | const auto M = (p_sort <= 0) ? (int)pos.size() : p_sort; 30 | std::copy(values, values + size, K.begin() +1); 31 | std::iota(R.begin(), R.begin() + this->size +2, 0); 32 | // std::cout << "M: " << M << std::endl; 33 | 34 | // Q1 35 | auto l = 1; 36 | auto r = size; 37 | auto j = r +1; 38 | do 39 | { 40 | auto i = l; 41 | j = r +1; 42 | auto KK = K[R[l]]; 43 | 44 | while (j > i) 45 | { 46 | do 47 | { 48 | i++; 49 | } 50 | while(K[R[i]] < KK); 51 | 52 | do 53 | { 54 | j--; 55 | } 56 | while(KK < K[R[j]]); 57 | 58 | if(j > i) 59 | { 60 | const auto tmp = R[j]; 61 | R[j] = R[i]; 62 | R[i] = tmp; 63 | } 64 | else 65 | { 66 | const auto tmp = R[l]; 67 | R[l] = R[j]; 68 | R[j] = tmp; 69 | 70 | if(j < M +1) 71 | l = j +1; 72 | else 73 | r = j -1; 74 | } 75 | } 76 | } while (j != M +1); 77 | for (auto i = 0; i < M; i++) 78 | pos[i] = R[i +1] -1; 79 | } 80 | }; 81 | 82 | #endif /* QUICK_SORTER_HPP */ 83 | -------------------------------------------------------------------------------- /examples/sorting.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | #include 6 | 7 | #include 8 | 9 | #include "sort/LC_sorter.hpp" 10 | #include "sort/LC_sorter_simd.hpp" 11 | #include "sort/Quick_sorter.hpp" 12 | 13 | using type = float; 14 | 15 | int main(int argc, char** argv) 16 | { 17 | // ------------------------------------------------------------------------ 18 | std::cout << "MIPP example" << std::endl; 19 | std::cout << "------------" << std::endl; 20 | std::cout << " - Instr. type: " << mipp::InstructionType << std::endl; 21 | std::cout << " - Instr. full type: " << mipp::InstructionFullType << std::endl; 22 | std::cout << " - Instr. version: " << mipp::InstructionVersion << std::endl; 23 | std::cout << " - Reg. size: " << mipp::RegisterSizeBit << " bits" << std::endl; 24 | std::cout << " - Reg. lanes: " << mipp::Lanes << std::endl; 25 | std::cout << " - 64-bit support: " << (mipp::Support64Bit ? "yes" : "no") << std::endl; 26 | std::cout << " - Byte/word support: " << (mipp::SupportByteWord ? "yes" : "no") << std::endl; 27 | auto ext = mipp::InstructionExtensions(); 28 | if (ext.size() > 0) 29 | { 30 | std::cout << " - Instr. extensions: {"; 31 | for (auto i = 0; i < (int)ext.size(); i++) 32 | std::cout << ext[i] << (i < ((int)ext.size() -1) ? ", " : ""); 33 | std::cout << "}" << std::endl; 34 | } 35 | std::cout << std::endl; 36 | // ------------------------------------------------------------------------ 37 | 38 | const auto n_elmts = (argc > 1) ? std::atoi(argv[1]) : 512; 39 | const auto k = (argc > 2) ? std::atoi(argv[2]) : 2; 40 | const auto n_tests = (argc > 3) ? std::atoi(argv[3]) : 1000; 41 | 42 | std::cout << "n_elmts = " << n_elmts << std::endl; 43 | std::cout << "k = " << k << std::endl; 44 | std::cout << "n_tests = " << n_tests << std::endl << std::endl; 45 | 46 | std::random_device rd; 47 | std::mt19937 g(rd()); 48 | g.seed(123); 49 | 50 | std::vector> values(n_tests, std::vector(n_elmts)); 51 | for (auto i = 0; i < n_tests; i++) 52 | { 53 | std::iota (values[i].begin(), values[i].end(), 0); 54 | std::shuffle(values[i].begin(), values[i].end(), g); 55 | } 56 | 57 | // ------------------------------------------------------------------------- 58 | 59 | std::cout << "values: "; 60 | for (auto i = 0; i < n_elmts; i++) 61 | std::cout << values[n_tests -1][i] << ","; 62 | std::cout << std::endl; 63 | 64 | std::cout << std::endl << "Lewis Carroll:" << std::endl; 65 | std::vector pos(k, -1); 66 | LC_sorter lc(n_elmts); 67 | std::fill(pos.begin(), pos.end(), -1); 68 | auto t_before = std::chrono::steady_clock::now(); 69 | auto csum = 0; 70 | for (auto i = 0; i < n_tests; i++) 71 | { 72 | lc.partial_sort(values[i].data(), pos, n_elmts); 73 | csum += pos[k -1]; 74 | } 75 | auto t_after = std::chrono::steady_clock::now(); 76 | auto d_delta = t_after - t_before; 77 | auto decod_time_ms = (float)d_delta.count() * 0.000001f; 78 | 79 | std::cout << "min pos: "; for (auto i = 0; i < k; i++) std::cout << pos[i] << ","; std::cout << std::endl; 80 | std::cout << "min: "; for (auto i = 0; i < k; i++) std::cout << values[n_tests -1][pos[i]] << ","; std::cout << std::endl; 81 | std::cout << "time: " << decod_time_ms << "ms" << std::endl; 82 | std::cout << "csum: " << csum << std::endl; 83 | 84 | std::cout << std::endl << "Lewis Carroll SIMD:" << std::endl; 85 | LC_sorter_simd lc_simd(n_elmts); 86 | std::fill(pos.begin(), pos.end(), -1); 87 | t_before = std::chrono::steady_clock::now(); 88 | csum = 0; 89 | for (auto i = 0; i < n_tests; i++) 90 | { 91 | lc_simd.partial_sort(values[i].data(), pos); 92 | csum += pos[k -1]; 93 | } 94 | t_after = std::chrono::steady_clock::now(); 95 | d_delta = t_after - t_before; 96 | decod_time_ms = (float)d_delta.count() * 0.000001f; 97 | 98 | std::cout << "min pos: "; for (auto i = 0; i < k; i++) std::cout << pos[i] << ","; std::cout << std::endl; 99 | std::cout << "min: "; for (auto i = 0; i < k; i++) std::cout << values[n_tests -1][pos[i]] << ","; std::cout << std::endl; 100 | std::cout << "time: " << decod_time_ms << "ms" << std::endl; 101 | std::cout << "csum: " << csum << std::endl; 102 | 103 | std::cout << std::endl << "Partial Quicksort:" << std::endl; 104 | Quick_sorter qs(n_elmts); 105 | std::fill(pos.begin(), pos.end(), -1); 106 | t_before = std::chrono::steady_clock::now(); 107 | csum = 0; 108 | for (auto i = 0; i < n_tests; i++) 109 | { 110 | qs.sort(values[i].data(), pos); 111 | csum += pos[k -1]; 112 | } 113 | t_after = std::chrono::steady_clock::now(); 114 | d_delta = t_after - t_before; 115 | decod_time_ms = (float)d_delta.count() * 0.000001f; 116 | 117 | std::cout << "min pos: "; for (auto i = 0; i < k; i++) std::cout << pos[i] << ","; std::cout << std::endl; 118 | std::cout << "min: "; for (auto i = 0; i < k; i++) std::cout << values[n_tests -1][pos[i]] << ","; std::cout << std::endl; 119 | std::cout << "time: " << decod_time_ms << "ms" << std::endl; 120 | std::cout << "csum: " << csum << std::endl; 121 | 122 | // std::cout << std::endl << "C++11 partial sort:" << std::endl; 123 | // mipp::vector indices(n_elmts); 124 | // std::iota(indices.begin(), indices.end(), 0); 125 | // t_before = std::chrono::steady_clock::now(); 126 | // csum = 0; 127 | // for (auto i = 0; i < n_tests; i++) 128 | // { 129 | // std::partial_sort(indices.begin(), indices.begin() +k, indices.end(), 130 | // [values, i](int x, int y) { 131 | // return values[i][x] < values[i][y]; 132 | // }); 133 | // csum += indices[k -1]; 134 | // } 135 | 136 | // t_after = std::chrono::steady_clock::now(); 137 | // d_delta = t_after - t_before; 138 | // decod_time_ms = (float)d_delta.count() * 0.000001f; 139 | 140 | // std::cout << "min pos: "; for (auto i = 0; i < k; i++) std::cout << indices[i] << ","; std::cout << std::endl; 141 | // std::cout << "min: "; for (auto i = 0; i < k; i++) std::cout << values[n_tests -1][indices[i]] << ","; std::cout << std::endl; 142 | // std::cout << "time: " << decod_time_ms << "ms" << std::endl; 143 | // std::cout << "csum: " << csum << std::endl; 144 | 145 | return 0; 146 | } 147 | -------------------------------------------------------------------------------- /include/math/avx512_mathfun.h: -------------------------------------------------------------------------------- 1 | /* 2 | AVX512 implementation of sin, cos, sincos, exp and log 3 | 4 | Based on "sse_mathfun.h", by Julien Pommier 5 | http://gruntthepeon.free.fr/ssemath/ 6 | 7 | Copyright (C) 2017 Adrien Cassagne 8 | MIT license 9 | */ 10 | #ifdef __AVX512F__ 11 | #ifndef AVX512_MATHFUN_H_ 12 | #define AVX512_MATHFUN_H_ 13 | 14 | #include 15 | 16 | typedef __m512 v16sf; // vector of 8 float (avx) 17 | 18 | // prototypes 19 | inline v16sf log512_ps(v16sf x); 20 | inline v16sf exp512_ps(v16sf x); 21 | inline v16sf sin512_ps(v16sf x); 22 | inline v16sf cos512_ps(v16sf x); 23 | inline void sincos512_ps(v16sf x, v16sf *s, v16sf *c); 24 | 25 | #include "avx512_mathfun.hxx" 26 | 27 | #endif 28 | #endif 29 | -------------------------------------------------------------------------------- /include/math/avx_mathfun.h: -------------------------------------------------------------------------------- 1 | /* 2 | AVX implementation of sin, cos, sincos, exp and log 3 | 4 | Based on "sse_mathfun.h", by Julien Pommier 5 | http://gruntthepeon.free.fr/ssemath/ 6 | 7 | Copyright (C) 2012 Giovanni Garberoglio 8 | Interdisciplinary Laboratory for Computational Science (LISC) 9 | Fondazione Bruno Kessler and University of Trento 10 | via Sommarive, 18 11 | I-38123 Trento (Italy) 12 | 13 | This software is provided 'as-is', without any express or implied 14 | warranty. In no event will the authors be held liable for any damages 15 | arising from the use of this software. 16 | 17 | Permission is granted to anyone to use this software for any purpose, 18 | including commercial applications, and to alter it and redistribute it 19 | freely, subject to the following restrictions: 20 | 21 | 1. The origin of this software must not be misrepresented; you must not 22 | claim that you wrote the original software. If you use this software 23 | in a product, an acknowledgment in the product documentation would be 24 | appreciated but is not required. 25 | 2. Altered source versions must be plainly marked as such, and must not be 26 | misrepresented as being the original software. 27 | 3. This notice may not be removed or altered from any source distribution. 28 | 29 | (this is the zlib license) 30 | */ 31 | #ifdef __AVX__ 32 | #ifndef AVX_MATHFUN_H_ 33 | #define AVX_MATHFUN_H_ 34 | 35 | #include 36 | 37 | typedef __m256 v8sf; // vector of 8 float (avx) 38 | 39 | // prototypes 40 | inline v8sf log256_ps(v8sf x); 41 | inline v8sf exp256_ps(v8sf x); 42 | inline v8sf sin256_ps(v8sf x); 43 | inline v8sf cos256_ps(v8sf x); 44 | inline void sincos256_ps(v8sf x, v8sf *s, v8sf *c); 45 | 46 | #include "avx_mathfun.hxx" 47 | 48 | #endif 49 | #endif -------------------------------------------------------------------------------- /include/math/neon_mathfun.h: -------------------------------------------------------------------------------- 1 | /* NEON implementation of sin, cos, exp and log 2 | 3 | Inspired by Intel Approximate Math library, and based on the 4 | corresponding algorithms of the cephes math library 5 | */ 6 | 7 | /* Copyright (C) 2011 Julien Pommier 8 | 9 | This software is provided 'as-is', without any express or implied 10 | warranty. In no event will the authors be held liable for any damages 11 | arising from the use of this software. 12 | 13 | Permission is granted to anyone to use this software for any purpose, 14 | including commercial applications, and to alter it and redistribute it 15 | freely, subject to the following restrictions: 16 | 17 | 1. The origin of this software must not be misrepresented; you must not 18 | claim that you wrote the original software. If you use this software 19 | in a product, an acknowledgment in the product documentation would be 20 | appreciated but is not required. 21 | 2. Altered source versions must be plainly marked as such, and must not be 22 | misrepresented as being the original software. 23 | 3. This notice may not be removed or altered from any source distribution. 24 | 25 | (this is the zlib license) 26 | */ 27 | 28 | #if defined(__ARM_NEON__) || defined(__ARM_NEON) 29 | #ifndef NEON_MATHFUN_H_ 30 | #define NEON_MATHFUN_H_ 31 | 32 | #include 33 | 34 | typedef float32x4_t v4sf; // vector of 4 float 35 | 36 | // prototypes 37 | inline v4sf log_ps(v4sf x); 38 | inline v4sf exp_ps(v4sf x); 39 | inline v4sf sin_ps(v4sf x); 40 | inline v4sf cos_ps(v4sf x); 41 | inline void sincos_ps(v4sf x, v4sf *s, v4sf *c); 42 | 43 | #include "neon_mathfun.hxx" 44 | 45 | #endif 46 | #endif -------------------------------------------------------------------------------- /include/math/sse_mathfun.h: -------------------------------------------------------------------------------- 1 | /* SIMD (SSE1+MMX or SSE2) implementation of sin, cos, exp and log 2 | 3 | Inspired by Intel Approximate Math library, and based on the 4 | corresponding algorithms of the cephes math library 5 | 6 | The default is to use the SSE1 version. If you define USE_SSE2 the 7 | the SSE2 intrinsics will be used in place of the MMX intrinsics. Do 8 | not expect any significant performance improvement with SSE2. 9 | */ 10 | 11 | /* Copyright (C) 2007 Julien Pommier 12 | 13 | This software is provided 'as-is', without any express or implied 14 | warranty. In no event will the authors be held liable for any damages 15 | arising from the use of this software. 16 | 17 | Permission is granted to anyone to use this software for any purpose, 18 | including commercial applications, and to alter it and redistribute it 19 | freely, subject to the following restrictions: 20 | 21 | 1. The origin of this software must not be misrepresented; you must not 22 | claim that you wrote the original software. If you use this software 23 | in a product, an acknowledgment in the product documentation would be 24 | appreciated but is not required. 25 | 2. Altered source versions must be plainly marked as such, and must not be 26 | misrepresented as being the original software. 27 | 3. This notice may not be removed or altered from any source distribution. 28 | 29 | (this is the zlib license) 30 | */ 31 | 32 | #ifdef __SSE__ 33 | #ifndef SSE_MATHFUN_H_ 34 | #define SSE_MATHFUN_H_ 35 | 36 | #include 37 | 38 | typedef __m128 v4sf; // vector of 4 float (sse1) 39 | 40 | // prototypes 41 | inline v4sf log_ps(v4sf x); 42 | inline v4sf exp_ps(v4sf x); 43 | inline v4sf sin_ps(v4sf x); 44 | inline v4sf cos_ps(v4sf x); 45 | inline void sincos_ps(v4sf x, v4sf *s, v4sf *c); 46 | 47 | #include "sse_mathfun.hxx" 48 | 49 | #endif 50 | #endif -------------------------------------------------------------------------------- /include/mipp_scalar_op.h: -------------------------------------------------------------------------------- 1 | #ifndef MIPP_SCALAR_OP_H_ 2 | #define MIPP_SCALAR_OP_H_ 3 | 4 | namespace mipp_scop // My Intrinsics Plus Plus SCalar OPerations 5 | { 6 | template 7 | inline T add(const T val1, const T val2); 8 | 9 | template 10 | inline T sub(const T val1, const T val2); 11 | 12 | template 13 | inline T andb(const T val1, const T val2); 14 | 15 | template 16 | inline T xorb(const T val1, const T val2); 17 | 18 | template 19 | inline T msb(const T val); 20 | 21 | template 22 | inline T div2(const T val); 23 | 24 | template 25 | inline T div4(const T val); 26 | 27 | template 28 | inline T rshift(const T val, const int n); 29 | 30 | template 31 | inline T lshift(const T val, const int n); 32 | 33 | template 34 | struct All_one_bits { 35 | static inline T make(); 36 | }; 37 | } 38 | 39 | #include "mipp_scalar_op.hxx" 40 | 41 | #endif /* MIPP_SCALAR_OP_H_ */ 42 | -------------------------------------------------------------------------------- /mipp.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aff3ct/MIPP/75fc843637988f93ebea9a23d3f6f018c37a1970/mipp.jpg -------------------------------------------------------------------------------- /tests/lib/Catch2/README.md: -------------------------------------------------------------------------------- 1 | Catch2 (v2.13.10) from [GitHub](https://github.com/catchorg/Catch2). -------------------------------------------------------------------------------- /tests/src/arithmetic_operations/cdiv.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | #include 6 | #include 7 | #include 8 | 9 | template 10 | void test_reg_cdiv() 11 | { 12 | T inputs1[2*mipp::N()]; 13 | T inputs2[2*mipp::N()]; 14 | 15 | std::iota(inputs1, inputs1 + 2*mipp::N(), (T)1); 16 | std::iota(inputs2, inputs2 + 2*mipp::N(), (T)1); 17 | 18 | std::mt19937 g; 19 | std::shuffle(inputs1, inputs1 + 2*mipp::N(), g); 20 | std::shuffle(inputs2, inputs2 + 2*mipp::N(), g); 21 | 22 | mipp::regx2 r1; 23 | mipp::regx2 r2; 24 | 25 | r1.val[0] = mipp::load(inputs1); 26 | r1.val[1] = mipp::load(inputs1 + mipp::N()); 27 | r2.val[0] = mipp::load(inputs2); 28 | r2.val[1] = mipp::load(inputs2 + mipp::N()); 29 | 30 | mipp::regx2 r3 = mipp::cdiv(r1, r2); 31 | 32 | for (auto i = 0; i < mipp::N(); i++) 33 | { 34 | T norm = inputs2[i] * inputs2[i] + inputs2[mipp::N() +i] * inputs2[mipp::N() +i]; 35 | 36 | T res_re = (inputs1[i ] * inputs2[i] + inputs1[mipp::N() +i] * inputs2[mipp::N() +i]) / norm; 37 | T res_im = (inputs1[mipp::N() +i] * inputs2[i] - inputs1[ i] * inputs2[mipp::N() +i]) / norm; 38 | 39 | #if defined(MIPP_NEON) && MIPP_INSTR_VERSION == 1 40 | REQUIRE(mipp::get(r3.val[0], i) == Approx(res_re).epsilon(0.01)); 41 | REQUIRE(mipp::get(r3.val[1], i) == Approx(res_im).epsilon(0.01)); 42 | #else 43 | REQUIRE(mipp::get(r3.val[0], i) == res_re); 44 | REQUIRE(mipp::get(r3.val[1], i) == res_im); 45 | #endif 46 | } 47 | } 48 | 49 | #if !defined(MIPP_NO) && !defined(MIPP_SVE_LS) 50 | TEST_CASE("Complex division - mipp::reg", "[mipp::cdiv]") 51 | { 52 | #if defined(MIPP_64BIT) 53 | SECTION("datatype = double") { test_reg_cdiv(); } 54 | #endif 55 | SECTION("datatype = float") { test_reg_cdiv(); } 56 | } 57 | #endif 58 | 59 | template 60 | void test_Reg_cdiv() 61 | { 62 | T inputs1[2*mipp::N()]; 63 | T inputs2[2*mipp::N()]; 64 | 65 | std::iota(inputs1, inputs1 + 2*mipp::N(), (T)1); 66 | std::iota(inputs2, inputs2 + 2*mipp::N(), (T)1); 67 | 68 | std::mt19937 g; 69 | std::shuffle(inputs1, inputs1 + 2*mipp::N(), g); 70 | std::shuffle(inputs2, inputs2 + 2*mipp::N(), g); 71 | 72 | mipp::Regx2 r1 = inputs1; 73 | mipp::Regx2 r2 = inputs2; 74 | mipp::Regx2 r3 = r1 / r2; 75 | 76 | for (auto i = 0; i < mipp::N(); i++) 77 | { 78 | T norm = inputs2[i] * inputs2[i] + inputs2[mipp::N() +i] * inputs2[mipp::N() +i]; 79 | 80 | T res_re = (inputs1[i ] * inputs2[i] + inputs1[mipp::N() +i] * inputs2[mipp::N() +i]) / norm; 81 | T res_im = (inputs1[mipp::N() +i] * inputs2[i] - inputs1[ i] * inputs2[mipp::N() +i]) / norm; 82 | 83 | #if defined(MIPP_NEON) && MIPP_INSTR_VERSION == 1 84 | REQUIRE(r3[0][i] == Approx(res_re).epsilon(0.01)); 85 | REQUIRE(r3[1][i] == Approx(res_im).epsilon(0.01)); 86 | #else 87 | REQUIRE(r3[0][i] == res_re); 88 | REQUIRE(r3[1][i] == res_im); 89 | #endif 90 | } 91 | } 92 | 93 | #if !defined(MIPP_SVE_LS) 94 | TEST_CASE("Complex division - mipp::Reg", "[mipp::cdiv]") 95 | { 96 | #if defined(MIPP_64BIT) 97 | SECTION("datatype = double") { test_Reg_cdiv(); } 98 | #endif 99 | SECTION("datatype = float") { test_Reg_cdiv(); } 100 | } 101 | #endif 102 | -------------------------------------------------------------------------------- /tests/src/arithmetic_operations/cmul.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | #include 6 | #include 7 | #include 8 | 9 | template 10 | void test_reg_cmul() 11 | { 12 | T inputs1[2*mipp::N()]; 13 | T inputs2[2*mipp::N()]; 14 | 15 | std::iota(inputs1, inputs1 + 2*mipp::N(), (T)1); 16 | std::iota(inputs2, inputs2 + 2*mipp::N(), (T)1); 17 | 18 | std::mt19937 g; 19 | std::shuffle(inputs1, inputs1 + 2*mipp::N(), g); 20 | std::shuffle(inputs2, inputs2 + 2*mipp::N(), g); 21 | 22 | mipp::regx2 r1; 23 | mipp::regx2 r2; 24 | 25 | r1.val[0] = mipp::load(inputs1); 26 | r1.val[1] = mipp::load(inputs1 + mipp::N()); 27 | r2.val[0] = mipp::load(inputs2); 28 | r2.val[1] = mipp::load(inputs2 + mipp::N()); 29 | 30 | mipp::regx2 r3 = mipp::cmul(r1, r2); 31 | 32 | for (auto i = 0; i < mipp::N(); i++) 33 | { 34 | T res_re = inputs1[i] * inputs2[ i] - inputs1[mipp::N() +i] * inputs2[mipp::N() +i]; 35 | T res_im = inputs1[i] * inputs2[mipp::N() +i] + inputs1[mipp::N() +i] * inputs2[ i]; 36 | 37 | #if defined(MIPP_NEON) && MIPP_INSTR_VERSION == 1 38 | REQUIRE(mipp::get(r3.val[0], i) == Approx(res_re)); 39 | REQUIRE(mipp::get(r3.val[1], i) == Approx(res_im)); 40 | #else 41 | REQUIRE(mipp::get(r3.val[0], i) == res_re); 42 | REQUIRE(mipp::get(r3.val[1], i) == res_im); 43 | #endif 44 | } 45 | } 46 | 47 | #if !defined(MIPP_NO) && !defined(MIPP_SVE_LS) 48 | TEST_CASE("Complex multiplication - mipp::reg", "[mipp::cmul]") 49 | { 50 | #if defined(MIPP_64BIT) 51 | SECTION("datatype = double") { test_reg_cmul(); } 52 | #endif 53 | SECTION("datatype = float") { test_reg_cmul(); } 54 | 55 | #if !defined(MIPP_AVX) || (defined(MIPP_AVX) && MIPP_INSTR_VERSION >= 2) 56 | #if !defined(MIPP_SSE) || (defined(MIPP_SSE) && MIPP_INSTR_VERSION >= 41) 57 | SECTION("datatype = int32_t") { test_reg_cmul(); } 58 | #endif 59 | #if defined(MIPP_BW) 60 | #if !defined(MIPP_SSE) || (defined(MIPP_SSE) && MIPP_INSTR_VERSION >= 2) 61 | SECTION("datatype = int16_t") { test_reg_cmul(); } 62 | #endif 63 | #endif 64 | // #if defined(MIPP_NEON) 65 | // SECTION("datatype = int8_t") { test_reg_cmul(); } 66 | // #endif 67 | #endif 68 | } 69 | #endif 70 | 71 | template 72 | void test_Reg_cmul() 73 | { 74 | T inputs1[2*mipp::N()]; 75 | T inputs2[2*mipp::N()]; 76 | 77 | std::iota(inputs1, inputs1 + 2*mipp::N(), (T)1); 78 | std::iota(inputs2, inputs2 + 2*mipp::N(), (T)1); 79 | 80 | std::mt19937 g; 81 | std::shuffle(inputs1, inputs1 + 2*mipp::N(), g); 82 | std::shuffle(inputs2, inputs2 + 2*mipp::N(), g); 83 | 84 | mipp::Regx2 r1 = inputs1; 85 | mipp::Regx2 r2 = inputs2; 86 | mipp::Regx2 r3 = r1 * r2; 87 | 88 | for (auto i = 0; i < mipp::N(); i++) 89 | { 90 | T res_re = inputs1[i] * inputs2[ i] - inputs1[mipp::N() +i] * inputs2[mipp::N() +i]; 91 | T res_im = inputs1[i] * inputs2[mipp::N() +i] + inputs1[mipp::N() +i] * inputs2[ i]; 92 | 93 | #if defined(MIPP_NEON) && MIPP_INSTR_VERSION == 1 94 | REQUIRE(r3[0][i] == Approx(res_re)); 95 | REQUIRE(r3[1][i] == Approx(res_im)); 96 | #else 97 | REQUIRE(r3[0][i] == res_re); 98 | REQUIRE(r3[1][i] == res_im); 99 | #endif 100 | } 101 | } 102 | 103 | #if !defined(MIPP_SVE_LS) 104 | TEST_CASE("Complex multiplication - mipp::Reg", "[mipp::cmul]") 105 | { 106 | #if defined(MIPP_64BIT) 107 | SECTION("datatype = double") { test_Reg_cmul(); } 108 | #endif 109 | SECTION("datatype = float") { test_Reg_cmul(); } 110 | 111 | #if !defined(MIPP_AVX) || (defined(MIPP_AVX) && MIPP_INSTR_VERSION >= 2) 112 | #if !defined(MIPP_SSE) || (defined(MIPP_SSE) && MIPP_INSTR_VERSION >= 41) 113 | SECTION("datatype = int32_t") { test_Reg_cmul(); } 114 | #endif 115 | #if defined(MIPP_BW) 116 | #if !defined(MIPP_SSE) || (defined(MIPP_SSE) && MIPP_INSTR_VERSION >= 2) 117 | SECTION("datatype = int16_t") { test_Reg_cmul(); } 118 | #endif 119 | #endif 120 | // #if defined(MIPP_NEON) 121 | // SECTION("datatype = int8_t") { test_Reg_cmul(); } 122 | // #endif 123 | #endif 124 | } 125 | #endif 126 | -------------------------------------------------------------------------------- /tests/src/arithmetic_operations/cmulconj.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | #include 6 | #include 7 | #include 8 | 9 | template 10 | void test_reg_cmulconj() 11 | { 12 | T inputs1[2*mipp::N()]; 13 | T inputs2[2*mipp::N()]; 14 | 15 | std::iota(inputs1, inputs1 + 2*mipp::N(), (T)1); 16 | std::iota(inputs2, inputs2 + 2*mipp::N(), (T)1); 17 | 18 | std::mt19937 g; 19 | std::shuffle(inputs1, inputs1 + 2*mipp::N(), g); 20 | std::shuffle(inputs2, inputs2 + 2*mipp::N(), g); 21 | 22 | mipp::regx2 r1; 23 | mipp::regx2 r2; 24 | 25 | r1.val[0] = mipp::load(inputs1); 26 | r1.val[1] = mipp::load(inputs1 + mipp::N()); 27 | r2.val[0] = mipp::load(inputs2); 28 | r2.val[1] = mipp::load(inputs2 + mipp::N()); 29 | 30 | mipp::regx2 r3 = mipp::cmulconj(r1, r2); 31 | 32 | for (auto i = 0; i < mipp::N(); i++) 33 | { 34 | T res_re = inputs1[ i] * inputs2[i] + inputs1[mipp::N() +i] * inputs2[mipp::N() +i]; 35 | T res_im = inputs1[mipp::N() +i] * inputs2[i] - inputs1[ i] * inputs2[mipp::N() +i]; 36 | 37 | #if defined(MIPP_NEON) && MIPP_INSTR_VERSION == 1 38 | REQUIRE(mipp::get(r3.val[0], i) == Approx(res_re)); 39 | REQUIRE(mipp::get(r3.val[1], i) == Approx(res_im)); 40 | #else 41 | REQUIRE(mipp::get(r3.val[0], i) == res_re); 42 | REQUIRE(mipp::get(r3.val[1], i) == res_im); 43 | #endif 44 | } 45 | } 46 | 47 | #if !defined(MIPP_NO) && !defined(MIPP_SVE_LS) 48 | TEST_CASE("Complex multiplication conjugate - mipp::reg", "[mipp::cmulconj]") 49 | { 50 | #if defined(MIPP_64BIT) 51 | SECTION("datatype = double") { test_reg_cmulconj(); } 52 | #endif 53 | SECTION("datatype = float") { test_reg_cmulconj(); } 54 | 55 | #if !defined(MIPP_AVX) || (defined(MIPP_AVX) && MIPP_INSTR_VERSION >= 2) 56 | #if !defined(MIPP_SSE) || (defined(MIPP_SSE) && MIPP_INSTR_VERSION >= 41) 57 | SECTION("datatype = int32_t") { test_reg_cmulconj(); } 58 | #endif 59 | #if defined(MIPP_BW) 60 | #if !defined(MIPP_SSE) || (defined(MIPP_SSE) && MIPP_INSTR_VERSION >= 2) 61 | SECTION("datatype = int16_t") { test_reg_cmulconj(); } 62 | #endif 63 | #endif 64 | // #if defined(MIPP_NEON) 65 | // SECTION("datatype = int8_t") { test_reg_cmulconj(); } 66 | // #endif 67 | #endif 68 | } 69 | #endif 70 | 71 | template 72 | void test_Reg_cmulconj() 73 | { 74 | T inputs1[2*mipp::N()]; 75 | T inputs2[2*mipp::N()]; 76 | 77 | std::iota(inputs1, inputs1 + 2*mipp::N(), (T)1); 78 | std::iota(inputs2, inputs2 + 2*mipp::N(), (T)1); 79 | 80 | std::mt19937 g; 81 | std::shuffle(inputs1, inputs1 + 2*mipp::N(), g); 82 | std::shuffle(inputs2, inputs2 + 2*mipp::N(), g); 83 | 84 | mipp::Regx2 r1 = inputs1; 85 | mipp::Regx2 r2 = inputs2; 86 | mipp::Regx2 r3 = mipp::cmulconj(r1, r2); 87 | 88 | for (auto i = 0; i < mipp::N(); i++) 89 | { 90 | T res_re = inputs1[ i] * inputs2[i] + inputs1[mipp::N() +i] * inputs2[mipp::N() +i]; 91 | T res_im = inputs1[mipp::N() +i] * inputs2[i] - inputs1[ i] * inputs2[mipp::N() +i]; 92 | 93 | #if defined(MIPP_NEON) && MIPP_INSTR_VERSION == 1 94 | REQUIRE(r3[0][i] == Approx(res_re)); 95 | REQUIRE(r3[1][i] == Approx(res_im)); 96 | #else 97 | REQUIRE(r3[0][i] == res_re); 98 | REQUIRE(r3[1][i] == res_im); 99 | #endif 100 | } 101 | } 102 | 103 | #if !defined(MIPP_SVE_LS) 104 | TEST_CASE("Complex multiplication conjugate - mipp::Reg", "[mipp::cmulconj]") 105 | { 106 | #if defined(MIPP_64BIT) 107 | SECTION("datatype = double") { test_Reg_cmulconj(); } 108 | #endif 109 | SECTION("datatype = float") { test_Reg_cmulconj(); } 110 | 111 | #if !defined(MIPP_AVX) || (defined(MIPP_AVX) && MIPP_INSTR_VERSION >= 2) 112 | #if !defined(MIPP_SSE) || (defined(MIPP_SSE) && MIPP_INSTR_VERSION >= 41) 113 | SECTION("datatype = int32_t") { test_Reg_cmulconj(); } 114 | #endif 115 | #if defined(MIPP_BW) 116 | #if !defined(MIPP_SSE) || (defined(MIPP_SSE) && MIPP_INSTR_VERSION >= 2) 117 | SECTION("datatype = int16_t") { test_Reg_cmulconj(); } 118 | #endif 119 | #endif 120 | // #if defined(MIPP_NEON) 121 | // SECTION("datatype = int8_t") { test_Reg_cmulconj(); } 122 | // #endif 123 | #endif 124 | } 125 | #endif 126 | -------------------------------------------------------------------------------- /tests/src/arithmetic_operations/conj.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | #include 6 | #include 7 | #include 8 | 9 | template 10 | void test_reg_conj() 11 | { 12 | T inputs1[2*mipp::N()]; 13 | 14 | std::iota(inputs1, inputs1 + 2*mipp::N(), (T)0); 15 | 16 | std::mt19937 g; 17 | std::shuffle(inputs1, inputs1 + 2*mipp::N(), g); 18 | 19 | mipp::regx2 r1; 20 | 21 | r1.val[0] = mipp::load(inputs1); 22 | r1.val[1] = mipp::load(inputs1 + mipp::N()); 23 | 24 | mipp::regx2 r2 = mipp::conj(r1); 25 | 26 | for (auto i = 0; i < mipp::N(); i++) 27 | { 28 | T res_re = inputs1[ i]; 29 | T res_im = -inputs1[mipp::N() +i]; 30 | 31 | REQUIRE(mipp::get(r2.val[0], i) == res_re); 32 | REQUIRE(mipp::get(r2.val[1], i) == res_im); 33 | } 34 | } 35 | 36 | #if !defined(MIPP_NO) && !defined(MIPP_SVE_LS) 37 | TEST_CASE("Complex conjugate - mipp::reg", "[mipp::conj]") 38 | { 39 | #if defined(MIPP_64BIT) 40 | SECTION("datatype = double") { test_reg_conj(); } 41 | #endif 42 | SECTION("datatype = float") { test_reg_conj(); } 43 | 44 | #if !defined(MIPP_AVX) || (defined(MIPP_AVX) && MIPP_INSTR_VERSION >= 2) 45 | #if !defined(MIPP_SSE) || (defined(MIPP_SSE) && MIPP_INSTR_VERSION >= 2) 46 | SECTION("datatype = int32_t") { test_reg_conj(); } 47 | #endif 48 | #endif 49 | #if defined(MIPP_BW) 50 | SECTION("datatype = int16_t") { test_reg_conj(); } 51 | SECTION("datatype = int8_t") { test_reg_conj(); } 52 | #endif 53 | } 54 | #endif 55 | 56 | template 57 | void test_Reg_conj() 58 | { 59 | T inputs1[2*mipp::N()]; 60 | 61 | std::iota(inputs1, inputs1 + 2*mipp::N(), (T)0); 62 | 63 | std::mt19937 g; 64 | std::shuffle(inputs1, inputs1 + 2*mipp::N(), g); 65 | 66 | mipp::Regx2 r1 = inputs1; 67 | mipp::Regx2 r2 = mipp::conj(r1); 68 | 69 | for (auto i = 0; i < mipp::N(); i++) 70 | { 71 | T res_re = inputs1[ i]; 72 | T res_im = -inputs1[mipp::N() +i]; 73 | 74 | REQUIRE(r2[0][i] == res_re); 75 | REQUIRE(r2[1][i] == res_im); 76 | } 77 | } 78 | 79 | #if !defined(MIPP_SVE_LS) 80 | TEST_CASE("Complex conjugate - mipp::Reg", "[mipp::conj]") 81 | { 82 | #if defined(MIPP_64BIT) 83 | SECTION("datatype = double") { test_Reg_conj(); } 84 | #endif 85 | SECTION("datatype = float") { test_Reg_conj(); } 86 | 87 | #if !defined(MIPP_AVX) || (defined(MIPP_AVX) && MIPP_INSTR_VERSION >= 2) 88 | #if !defined(MIPP_SSE) || (defined(MIPP_SSE) && MIPP_INSTR_VERSION >= 2) 89 | SECTION("datatype = int32_t") { test_Reg_conj(); } 90 | #endif 91 | #endif 92 | #if defined(MIPP_BW) 93 | SECTION("datatype = int16_t") { test_Reg_conj(); } 94 | SECTION("datatype = int8_t") { test_Reg_conj(); } 95 | #endif 96 | } 97 | #endif 98 | -------------------------------------------------------------------------------- /tests/src/arithmetic_operations/cvt.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | #include 6 | #include 7 | #include 8 | 9 | template 10 | void test_reg_cvt() 11 | { 12 | constexpr int N1 = mipp::N(); 13 | constexpr int N2 = mipp::N(); 14 | 15 | T1 inputs1[N1]; 16 | std::iota(inputs1, inputs1 + N1, (T1)-N1/2); 17 | 18 | for (auto i = 0; i < N1; i++) 19 | inputs1[i] += i % 2 ? (T1)0.4 : (T1)0.6; 20 | 21 | mipp::reg r1 = mipp::load(inputs1); 22 | mipp::reg r2 = N1 != N2 ? mipp::cvt(mipp::low(r1)) : mipp::cvt(r1); 23 | 24 | for (auto i = 0; i < N2; i++) 25 | { 26 | auto res = static_cast(std::round(inputs1[i])); 27 | REQUIRE(mipp::get(r2, i) == res); 28 | } 29 | } 30 | 31 | #if !defined(MIPP_NO) && !defined(MIPP_SVE_LS) 32 | TEST_CASE("Convert - mipp::reg", "[mipp::cvt]") 33 | { 34 | #if defined(MIPP_64BIT) 35 | #if !defined(MIPP_AVX512) || (defined(MIPP_AVX512) && defined(__AVX512DQ__)) 36 | #if !defined(MIPP_SSE) && !defined(MIPP_AVX) 37 | SECTION("datatype = int64_t -> double") { test_reg_cvt(); } 38 | SECTION("datatype = double -> int64_t") { test_reg_cvt(); } 39 | #endif 40 | #endif 41 | #endif 42 | SECTION("datatype = int32_t -> float") { test_reg_cvt(); } 43 | SECTION("datatype = float -> int32_t") { test_reg_cvt(); } 44 | 45 | #if !defined(MIPP_AVX) || (defined(MIPP_AVX) && MIPP_INSTR_VERSION >= 2) 46 | #if !defined(MIPP_SSE) || (defined(MIPP_SSE) && MIPP_INSTR_VERSION >= 41) 47 | #if defined(MIPP_64BIT) 48 | SECTION("datatype = int32_t -> int64_t") { test_reg_cvt(); } 49 | #endif 50 | #if defined(MIPP_BW) 51 | SECTION("datatype = int16_t -> int32_t") { test_reg_cvt(); } 52 | SECTION("datatype = int8_t -> int16_t") { test_reg_cvt(); } 53 | #endif 54 | #endif 55 | #endif 56 | } 57 | #endif 58 | 59 | template 60 | void test_Reg_cvt() 61 | { 62 | constexpr int N1 = mipp::N(); 63 | constexpr int N2 = mipp::N(); 64 | 65 | T1 inputs1[N1]; 66 | std::iota(inputs1, inputs1 + N1, (T1)-N1/2); 67 | 68 | for (auto i = 0; i < N1; i++) 69 | inputs1[i] += i % 2 ? (T1)0.4 : (T1)0.6; 70 | 71 | mipp::Reg r1 = inputs1; 72 | mipp::Reg r2 = N1 != N2 ? mipp::cvt(r1.low()) : mipp::cvt(r1); 73 | 74 | for (auto i = 0; i < N2; i++) 75 | { 76 | auto res = static_cast(std::round(inputs1[i])); 77 | REQUIRE(r2[i] == res); 78 | } 79 | } 80 | 81 | #if !defined(MIPP_SVE_LS) 82 | TEST_CASE("Convert - mipp::Reg", "[mipp::cvt]") 83 | { 84 | #if defined(MIPP_64BIT) 85 | #if !defined(MIPP_AVX512) || (defined(MIPP_AVX512) && defined(__AVX512DQ__)) 86 | #if !defined(MIPP_SSE) && !defined(MIPP_AVX) 87 | SECTION("datatype = int64_t -> double") { test_Reg_cvt(); } 88 | SECTION("datatype = double -> int64_t") { test_Reg_cvt(); } 89 | #endif 90 | #endif 91 | #endif 92 | SECTION("datatype = int32_t -> float") { test_Reg_cvt(); } 93 | SECTION("datatype = float -> int32_t") { test_Reg_cvt(); } 94 | 95 | #if !defined(MIPP_AVX) || (defined(MIPP_AVX) && MIPP_INSTR_VERSION >= 2) 96 | #if !defined(MIPP_SSE) || (defined(MIPP_SSE) && MIPP_INSTR_VERSION >= 41) 97 | #if defined(MIPP_64BIT) 98 | SECTION("datatype = int32_t -> int64_t") { test_Reg_cvt(); } 99 | #endif 100 | #if defined(MIPP_BW) 101 | SECTION("datatype = int16_t -> int32_t") { test_Reg_cvt(); } 102 | SECTION("datatype = int8_t -> int16_t") { test_Reg_cvt(); } 103 | #endif 104 | #endif 105 | #endif 106 | } 107 | #endif 108 | -------------------------------------------------------------------------------- /tests/src/arithmetic_operations/norm.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | #include 6 | #include 7 | #include 8 | 9 | template 10 | void test_reg_norm() 11 | { 12 | T inputs1[2*mipp::N()]; 13 | 14 | std::iota(inputs1, inputs1 + 2*mipp::N(), (T)1); 15 | 16 | std::mt19937 g; 17 | std::shuffle(inputs1, inputs1 + 2*mipp::N(), g); 18 | 19 | mipp::regx2 r1; 20 | 21 | r1.val[0] = mipp::load(inputs1); 22 | r1.val[1] = mipp::load(inputs1 + mipp::N()); 23 | 24 | mipp::reg r2 = mipp::norm(r1); 25 | 26 | for (auto i = 0; i < mipp::N(); i++) 27 | { 28 | T res = inputs1[i] * inputs1[i] + inputs1[mipp::N() +i] * inputs1[mipp::N() +i]; 29 | 30 | #if defined(MIPP_NEON) && MIPP_INSTR_VERSION == 1 31 | REQUIRE(mipp::get(r2, i) == Approx(res)); 32 | #else 33 | REQUIRE(mipp::get(r2, i) == res); 34 | #endif 35 | } 36 | } 37 | 38 | #ifndef MIPP_NO 39 | TEST_CASE("Complex norm - mipp::reg", "[mipp::norm]") 40 | { 41 | #if defined(MIPP_64BIT) 42 | SECTION("datatype = double") { test_reg_norm(); } 43 | #endif 44 | SECTION("datatype = float") { test_reg_norm(); } 45 | 46 | #if !defined(MIPP_AVX) || (defined(MIPP_AVX) && MIPP_INSTR_VERSION >= 2) 47 | #if !defined(MIPP_SSE) || (defined(MIPP_SSE) && MIPP_INSTR_VERSION >= 41) 48 | SECTION("datatype = int32_t") { test_reg_norm(); } 49 | #endif 50 | #if defined(MIPP_BW) 51 | #if !defined(MIPP_SSE) || (defined(MIPP_SSE) && MIPP_INSTR_VERSION >= 2) 52 | SECTION("datatype = int16_t") { test_reg_norm(); } 53 | #endif 54 | #endif 55 | // #if defined(MIPP_NEON) 56 | // SECTION("datatype = int8_t") { test_reg_norm(); } 57 | // #endif 58 | #endif 59 | } 60 | #endif 61 | 62 | template 63 | void test_Reg_norm() 64 | { 65 | T inputs1[2*mipp::N()]; 66 | 67 | std::iota(inputs1, inputs1 + 2*mipp::N(), (T)1); 68 | 69 | std::mt19937 g; 70 | std::shuffle(inputs1, inputs1 + 2*mipp::N(), g); 71 | 72 | mipp::Regx2 r1 = inputs1; 73 | mipp::Reg r2 = mipp::norm(r1); 74 | 75 | for (auto i = 0; i < mipp::N(); i++) 76 | { 77 | T res = inputs1[i] * inputs1[i] + inputs1[mipp::N() +i] * inputs1[mipp::N() +i]; 78 | 79 | #if defined(MIPP_NEON) && MIPP_INSTR_VERSION == 1 80 | REQUIRE(r2[i] == Approx(res)); 81 | #else 82 | REQUIRE(r2[i] == res); 83 | #endif 84 | } 85 | } 86 | 87 | TEST_CASE("Complex norm - mipp::Reg", "[mipp::norm]") 88 | { 89 | #if defined(MIPP_64BIT) 90 | SECTION("datatype = double") { test_Reg_norm(); } 91 | #endif 92 | SECTION("datatype = float") { test_Reg_norm(); } 93 | 94 | #if !defined(MIPP_AVX) || (defined(MIPP_AVX) && MIPP_INSTR_VERSION >= 2) 95 | #if !defined(MIPP_SSE) || (defined(MIPP_SSE) && MIPP_INSTR_VERSION >= 41) 96 | SECTION("datatype = int32_t") { test_Reg_norm(); } 97 | #endif 98 | #if defined(MIPP_BW) 99 | #if !defined(MIPP_SSE) || (defined(MIPP_SSE) && MIPP_INSTR_VERSION >= 2) 100 | SECTION("datatype = int16_t") { test_Reg_norm(); } 101 | #endif 102 | #endif 103 | // #if defined(MIPP_NEON) 104 | // SECTION("datatype = int8_t") { test_Reg_norm(); } 105 | // #endif 106 | #endif 107 | } 108 | -------------------------------------------------------------------------------- /tests/src/arithmetic_operations/pack.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | #include 6 | #include 7 | #include 8 | 9 | #ifndef MIPP_NO 10 | template 11 | void test_reg_pack() 12 | { 13 | constexpr int N1 = mipp::N(); 14 | constexpr int N2 = mipp::N(); 15 | 16 | static_assert(N1 < N2, "N1 has to be smaller than N2."); 17 | 18 | T1 inputs1[N1], inputs2[N1]; 19 | std::iota(inputs1, inputs1 + N1, std::numeric_limits::max() - (T1)N1); 20 | std::iota(inputs2, inputs2 + N1, (T1)-N1/2); 21 | 22 | std::mt19937 g; 23 | std::shuffle(inputs1, inputs1 + N1, g); 24 | std::shuffle(inputs2, inputs2 + N1, g); 25 | 26 | mipp::reg r1 = mipp::load(inputs1); 27 | mipp::reg r2 = mipp::load(inputs2); 28 | mipp::reg r3 = mipp::pack(r1, r2); 29 | 30 | T1 m = (T1)std::numeric_limits::min(); 31 | T1 M = (T1)std::numeric_limits::max(); 32 | for (auto i = 0; i < N1; i++) 33 | { 34 | auto res = static_cast(std::min(std::max(inputs1[i], m), M)); 35 | REQUIRE(mipp::get(r3, i) == res); 36 | } 37 | 38 | for (auto i = 0; i < N1; i++) 39 | { 40 | auto res = static_cast(std::min(std::max(inputs2[i], m), M)); 41 | REQUIRE(mipp::get(r3, N1 +i) == res); 42 | } 43 | } 44 | #endif 45 | 46 | #if !defined(MIPP_NO) && !defined(MIPP_SVE_LS) 47 | TEST_CASE("Pack - mipp::reg", "[mipp::pack]") 48 | { 49 | #if defined(MIPP_BW) 50 | SECTION("datatype = int32_t -> int16_t") { test_reg_pack(); } 51 | SECTION("datatype = int16_t -> int8_t") { test_reg_pack(); } 52 | #endif 53 | } 54 | #endif 55 | 56 | #ifndef MIPP_NO 57 | template 58 | void test_Reg_pack() 59 | { 60 | constexpr int N1 = mipp::N(); 61 | constexpr int N2 = mipp::N(); 62 | 63 | static_assert(N1 < N2, "N1 has to be smaller than N2."); 64 | 65 | T1 inputs1[N1], inputs2[N1]; 66 | std::iota(inputs1, inputs1 + N1, std::numeric_limits::max() - (T1)N1); 67 | std::iota(inputs2, inputs2 + N1, (T1)-N1/2); 68 | 69 | std::mt19937 g; 70 | std::shuffle(inputs1, inputs1 + N1, g); 71 | std::shuffle(inputs2, inputs2 + N1, g); 72 | 73 | mipp::Reg r1 = inputs1; 74 | mipp::Reg r2 = inputs2; 75 | mipp::Reg r3 = mipp::pack(r1, r2); 76 | 77 | T1 m = (T1)std::numeric_limits::min(); 78 | T1 M = (T1)std::numeric_limits::max(); 79 | for (auto i = 0; i < N1; i++) 80 | { 81 | auto res = static_cast(std::min(std::max(inputs1[i], m), M)); 82 | REQUIRE(r3[i] == res); 83 | } 84 | 85 | for (auto i = 0; i < N1; i++) 86 | { 87 | auto res = static_cast(std::min(std::max(inputs2[i], m), M)); 88 | REQUIRE(r3[N1 +i] == res); 89 | } 90 | } 91 | #endif 92 | 93 | #if !defined(MIPP_NO) && !defined(MIPP_SVE_LS) 94 | TEST_CASE("Pack - mipp::Reg", "[mipp::pack]") 95 | { 96 | #if defined(MIPP_BW) 97 | SECTION("datatype = int32_t -> int16_t") { test_Reg_pack(); } 98 | SECTION("datatype = int16_t -> int8_t") { test_Reg_pack(); } 99 | #endif 100 | } 101 | #endif 102 | -------------------------------------------------------------------------------- /tests/src/arithmetic_operations/sat.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | #include 6 | #include 7 | #include 8 | #include 9 | 10 | template 11 | void test_reg_sat_int() 12 | { 13 | constexpr int N = mipp::N(); 14 | T inputs1[N]; 15 | std::mt19937 g; 16 | std::uniform_int_distribution dis(std::numeric_limits::min(), std::numeric_limits::max()); 17 | 18 | for (auto i = 0; i < 100; i++) 19 | { 20 | for (auto i = 0; i < N; i++) 21 | inputs1[i] = dis(g); 22 | 23 | mipp::reg r1 = mipp::load(inputs1); 24 | 25 | T n1 = dis(g); 26 | T n2 = dis(g); 27 | 28 | T m = std::min(n1, n2); 29 | T M = std::max(n1, n2); 30 | 31 | mipp::reg r2 = mipp::sat(r1, m, M); 32 | 33 | for (auto i = 0; i < mipp::N(); i++) 34 | { 35 | T res = std::min(std::max(inputs1[i], m), M); 36 | REQUIRE(mipp::get(r2, i) == res); 37 | } 38 | } 39 | } 40 | 41 | template 42 | void test_reg_sat_real() 43 | { 44 | constexpr int N = mipp::N(); 45 | T inputs1[N]; 46 | std::mt19937 g; 47 | std::uniform_real_distribution dis(std::numeric_limits::min(), std::numeric_limits::max()); 48 | 49 | for (auto i = 0; i < 100; i++) 50 | { 51 | for (auto i = 0; i < N; i++) 52 | inputs1[i] = dis(g); 53 | 54 | mipp::reg r1 = mipp::load(inputs1); 55 | 56 | T n1 = dis(g); 57 | T n2 = dis(g); 58 | 59 | T m = std::min(n1, n2); 60 | T M = std::max(n1, n2); 61 | 62 | mipp::reg r2 = mipp::sat(r1, m, M); 63 | 64 | for (auto i = 0; i < mipp::N(); i++) 65 | { 66 | T res = std::min(std::max(inputs1[i], m), M); 67 | REQUIRE(mipp::get(r2, i) == res); 68 | } 69 | } 70 | } 71 | 72 | #if !defined(MIPP_NO) && !defined(MIPP_SVE_LS) 73 | TEST_CASE("Saturation - mipp::reg", "[mipp::sat]") 74 | { 75 | #if defined(MIPP_64BIT) 76 | SECTION("datatype = double") { test_reg_sat_real(); } 77 | #endif 78 | SECTION("datatype = float") { test_reg_sat_real(); } 79 | 80 | #if defined(MIPP_64BIT) 81 | #if !defined(MIPP_SSE) && !defined(MIPP_AVX) && !defined(MIPP_NEON) 82 | SECTION("datatype = int64_t") { test_reg_sat_int(); } 83 | SECTION("datatype = uint64_t") { test_reg_sat_int(); } 84 | #endif 85 | #endif 86 | #if !defined(MIPP_AVX) || (defined(MIPP_AVX) && MIPP_INSTR_VERSION >= 2) 87 | #if !defined(MIPP_SSE) || (defined(MIPP_SSE) && MIPP_INSTR_VERSION >= 41) 88 | SECTION("datatype = int32_t") { test_reg_sat_int(); } 89 | SECTION("datatype = uint32_t") { test_reg_sat_int(); } 90 | #endif 91 | #endif 92 | #if defined(MIPP_BW) 93 | SECTION("datatype = int16_t") { test_reg_sat_int(); } 94 | #if !defined(MIPP_SSE) || (defined(MIPP_SSE) && MIPP_INSTR_VERSION >= 41) 95 | SECTION("datatype = uint16_t") { test_reg_sat_int(); } 96 | #ifndef _MSC_VER 97 | SECTION("datatype = int8_t") { test_reg_sat_int(); } 98 | SECTION("datatype = uint8_t") { test_reg_sat_int(); } 99 | #endif 100 | #endif 101 | #endif 102 | } 103 | #endif 104 | 105 | template 106 | void test_Reg_sat_int() 107 | { 108 | constexpr int N = mipp::N(); 109 | T inputs1[N]; 110 | std::mt19937 g; 111 | std::uniform_int_distribution dis(std::numeric_limits::min(), std::numeric_limits::max()); 112 | 113 | for (auto i = 0; i < 100; i++) 114 | { 115 | for (auto i = 0; i < N; i++) 116 | inputs1[i] = dis(g); 117 | 118 | mipp::Reg r1 = inputs1; 119 | 120 | T n1 = dis(g); 121 | T n2 = dis(g); 122 | 123 | T m = std::min(n1, n2); 124 | T M = std::max(n1, n2); 125 | 126 | mipp::Reg r2 = mipp::sat(r1, m, M); 127 | 128 | for (auto i = 0; i < mipp::N(); i++) 129 | { 130 | T res = std::min(std::max(inputs1[i], m), M); 131 | REQUIRE(r2[i] == res); 132 | } 133 | } 134 | } 135 | 136 | template 137 | void test_Reg_sat_real() 138 | { 139 | constexpr int N = mipp::N(); 140 | T inputs1[N]; 141 | std::mt19937 g; 142 | std::uniform_real_distribution dis(std::numeric_limits::min(), std::numeric_limits::max()); 143 | 144 | for (auto i = 0; i < 100; i++) 145 | { 146 | for (auto i = 0; i < N; i++) 147 | inputs1[i] = dis(g); 148 | 149 | mipp::Reg r1 = inputs1; 150 | 151 | T n1 = dis(g); 152 | T n2 = dis(g); 153 | 154 | T m = std::min(n1, n2); 155 | T M = std::max(n1, n2); 156 | 157 | mipp::Reg r2 = mipp::sat(r1, m, M); 158 | 159 | for (auto i = 0; i < mipp::N(); i++) 160 | { 161 | T res = std::min(std::max(inputs1[i], m), M); 162 | REQUIRE(r2[i] == res); 163 | } 164 | } 165 | } 166 | 167 | #if !defined(MIPP_SVE_LS) 168 | TEST_CASE("Saturation - mipp::Reg", "[mipp::sat]") 169 | { 170 | #if defined(MIPP_64BIT) 171 | SECTION("datatype = double") { test_Reg_sat_real(); } 172 | #endif 173 | SECTION("datatype = float") { test_Reg_sat_real(); } 174 | 175 | #if defined(MIPP_64BIT) 176 | #if !defined(MIPP_SSE) && !defined(MIPP_AVX) && !defined(MIPP_NEON) 177 | SECTION("datatype = int64_t") { test_Reg_sat_int(); } 178 | SECTION("datatype = uint64_t") { test_Reg_sat_int(); } 179 | #endif 180 | #endif 181 | #if !defined(MIPP_AVX) || (defined(MIPP_AVX) && MIPP_INSTR_VERSION >= 2) 182 | #if !defined(MIPP_SSE) || (defined(MIPP_SSE) && MIPP_INSTR_VERSION >= 41) 183 | SECTION("datatype = int32_t") { test_Reg_sat_int(); } 184 | SECTION("datatype = uint32_t") { test_Reg_sat_int(); } 185 | #endif 186 | #endif 187 | #if defined(MIPP_BW) 188 | SECTION("datatype = int16_t") { test_Reg_sat_int(); } 189 | #if !defined(MIPP_SSE) || (defined(MIPP_SSE) && MIPP_INSTR_VERSION >= 41) 190 | SECTION("datatype = uint16_t") { test_Reg_sat_int(); } 191 | #ifndef _MSC_VER 192 | SECTION("datatype = int8_t") { test_Reg_sat_int(); } 193 | SECTION("datatype = uint8_t") { test_Reg_sat_int(); } 194 | #endif 195 | #endif 196 | #endif 197 | } 198 | #endif 199 | -------------------------------------------------------------------------------- /tests/src/arithmetic_operations/sign.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | #include 6 | #include 7 | #include 8 | 9 | template 10 | void test_reg_sign() 11 | { 12 | constexpr int N = mipp::N(); 13 | T inputs1[N]; 14 | std::iota(inputs1, inputs1 + N, (T)-N/2); 15 | 16 | std::mt19937 g; 17 | std::shuffle(inputs1, inputs1 + N, g); 18 | 19 | mipp::reg r1 = mipp::load(inputs1); 20 | mipp::msk m = mipp::sign(r1); 21 | mipp::reg r2 = mipp::toreg(m); 22 | 23 | for (auto i = 0; i < N; i++) 24 | { 25 | if (inputs1[i] >= 0) 26 | REQUIRE(mipp::get(r2, i) == (T)0); 27 | else 28 | REQUIRE(mipp::get(r2, i) != (T)0); 29 | } 30 | } 31 | 32 | #if !defined(MIPP_NO) && !defined(MIPP_SVE_LS) 33 | TEST_CASE("Sign - mipp::reg", "[mipp::sign]") 34 | { 35 | #if defined(MIPP_64BIT) 36 | SECTION("datatype = double") { test_reg_sign(); } 37 | #endif 38 | SECTION("datatype = float") { test_reg_sign(); } 39 | 40 | #if defined(MIPP_64BIT) 41 | #if !defined(MIPP_AVX) || (defined(MIPP_AVX) && MIPP_INSTR_VERSION >= 2) 42 | #if !defined(MIPP_SSE) 43 | SECTION("datatype = int64_t") { test_reg_sign(); } 44 | #endif 45 | #endif 46 | #endif 47 | #if !defined(MIPP_AVX) || (defined(MIPP_AVX) && MIPP_INSTR_VERSION >= 2) 48 | SECTION("datatype = int32_t") { test_reg_sign(); } 49 | #endif 50 | #if defined(MIPP_BW) 51 | SECTION("datatype = int16_t") { test_reg_sign(); } 52 | SECTION("datatype = int8_t") { test_reg_sign(); } 53 | #endif 54 | } 55 | #endif 56 | 57 | template 58 | void test_Reg_sign() 59 | { 60 | constexpr int N = mipp::N(); 61 | T inputs1[N]; 62 | std::iota(inputs1, inputs1 + N, (T)-N/2); 63 | 64 | std::mt19937 g; 65 | std::shuffle(inputs1, inputs1 + N, g); 66 | 67 | mipp::Reg r1 = inputs1; 68 | mipp::Msk m = mipp::sign(r1); 69 | mipp::Reg r2 = mipp::toreg(m.m); 70 | 71 | for (auto i = 0; i < N; i++) 72 | { 73 | if (inputs1[i] >= 0) 74 | REQUIRE(r2[i] == (T)0); 75 | else 76 | REQUIRE(r2[i] != (T)0); 77 | } 78 | } 79 | 80 | #if !defined(MIPP_SVE_LS) 81 | TEST_CASE("Sign - mipp::Reg", "[mipp::sign]") 82 | { 83 | #if defined(MIPP_64BIT) 84 | SECTION("datatype = double") { test_Reg_sign(); } 85 | #endif 86 | SECTION("datatype = float") { test_Reg_sign(); } 87 | 88 | #if defined(MIPP_64BIT) 89 | #if !defined(MIPP_AVX) || (defined(MIPP_AVX) && MIPP_INSTR_VERSION >= 2) 90 | #if !defined(MIPP_SSE) 91 | SECTION("datatype = int64_t") { test_Reg_sign(); } 92 | #endif 93 | #endif 94 | #endif 95 | #if !defined(MIPP_AVX) || (defined(MIPP_AVX) && MIPP_INSTR_VERSION >= 2) 96 | SECTION("datatype = int32_t") { test_Reg_sign(); } 97 | #endif 98 | #if defined(MIPP_BW) 99 | SECTION("datatype = int16_t") { test_Reg_sign(); } 100 | SECTION("datatype = int8_t") { test_Reg_sign(); } 101 | #endif 102 | } 103 | #endif 104 | -------------------------------------------------------------------------------- /tests/src/bitwise_operations/andb.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | #include 6 | #include 7 | #include 8 | 9 | template 10 | void test_reg_andb() 11 | { 12 | T inputs1[mipp::N()], inputs2[mipp::N()]; 13 | std::iota(inputs1, inputs1 + mipp::N(), (T)0); 14 | std::iota(inputs2, inputs2 + mipp::N(), (T)0); 15 | 16 | std::mt19937 g; 17 | std::shuffle(inputs1, inputs1 + mipp::N(), g); 18 | std::shuffle(inputs2, inputs2 + mipp::N(), g); 19 | 20 | mipp::reg r1 = mipp::load(inputs1); 21 | mipp::reg r2 = mipp::load(inputs2); 22 | mipp::reg r3 = mipp::andb(r1, r2); 23 | 24 | for (auto i = 0; i < mipp::N(); i++) 25 | { 26 | T res = inputs1[i] & inputs2[i]; 27 | REQUIRE(mipp::get(r3, i) == res); 28 | } 29 | } 30 | 31 | #if !defined(MIPP_NO) && !defined(MIPP_SVE_LS) 32 | TEST_CASE("Binary and - mipp::reg", "[mipp::andb]") 33 | { 34 | #if defined(MIPP_64BIT) 35 | SECTION("datatype = int64_t") { test_reg_andb(); } 36 | #endif 37 | SECTION("datatype = int32_t") { test_reg_andb(); } 38 | #if defined(MIPP_BW) 39 | SECTION("datatype = int16_t") { test_reg_andb(); } 40 | SECTION("datatype = int8_t") { test_reg_andb(); } 41 | #endif 42 | } 43 | #endif 44 | 45 | template 46 | void test_Reg_andb() 47 | { 48 | T inputs1[mipp::N()], inputs2[mipp::N()]; 49 | std::iota(inputs1, inputs1 + mipp::N(), (T)0); 50 | std::iota(inputs2, inputs2 + mipp::N(), (T)0); 51 | 52 | std::mt19937 g; 53 | std::shuffle(inputs1, inputs1 + mipp::N(), g); 54 | std::shuffle(inputs2, inputs2 + mipp::N(), g); 55 | 56 | mipp::Reg r1 = inputs1; 57 | mipp::Reg r2 = inputs2; 58 | mipp::Reg r3 = r1 & r2; 59 | 60 | for (auto i = 0; i < mipp::N(); i++) 61 | { 62 | T res = inputs1[i] & inputs2[i]; 63 | REQUIRE(r3[i] == res); 64 | } 65 | } 66 | 67 | #if !defined(MIPP_SVE_LS) 68 | TEST_CASE("Binary and - mipp::Reg", "[mipp::andb]") 69 | { 70 | #if defined(MIPP_64BIT) 71 | SECTION("datatype = int64_t") { test_Reg_andb(); } 72 | #endif 73 | SECTION("datatype = int32_t") { test_Reg_andb(); } 74 | #if defined(MIPP_BW) 75 | SECTION("datatype = int16_t") { test_Reg_andb(); } 76 | SECTION("datatype = int8_t") { test_Reg_andb(); } 77 | #endif 78 | } 79 | #endif 80 | 81 | template 82 | void test_msk_andb() 83 | { 84 | constexpr int N = mipp::N(); 85 | bool inputs1[N], inputs2[N]; 86 | std::mt19937 g; 87 | std::uniform_int_distribution dis(0, 1); 88 | 89 | for (auto t = 0; t < 100; t++) 90 | { 91 | for (auto i = 0; i < N; i++) 92 | { 93 | inputs1[i] = dis(g) ? true : false; 94 | inputs2[i] = dis(g) ? true : false; 95 | } 96 | 97 | std::shuffle(inputs1, inputs1 + mipp::N(), g); 98 | std::shuffle(inputs2, inputs2 + mipp::N(), g); 99 | 100 | mipp::msk m1 = mipp::set(inputs1); 101 | mipp::msk m2 = mipp::set(inputs2); 102 | mipp::msk m3 = mipp::andb(m1, m2); 103 | 104 | mipp::reg r = mipp::toreg(m3); 105 | 106 | for (auto i = 0; i < N; i++) 107 | { 108 | bool res = inputs1[i] & inputs2[i]; 109 | 110 | if (res) 111 | REQUIRE(mipp::get(r, i) != (T)0); 112 | else 113 | REQUIRE(mipp::get(r, i) == (T)res); 114 | } 115 | } 116 | } 117 | 118 | #ifndef MIPP_NO 119 | TEST_CASE("Binary and - mipp::msk", "[mipp::andb]") 120 | { 121 | #if defined(MIPP_64BIT) 122 | SECTION("datatype = int64_t") { test_msk_andb(); } 123 | #endif 124 | SECTION("datatype = int32_t") { test_msk_andb(); } 125 | #if defined(MIPP_BW) 126 | SECTION("datatype = int16_t") { test_msk_andb(); } 127 | SECTION("datatype = int8_t") { test_msk_andb(); } 128 | #endif 129 | } 130 | #endif 131 | 132 | template 133 | void test_Msk_andb() 134 | { 135 | constexpr int N = mipp::N(); 136 | bool inputs1[N], inputs2[N]; 137 | std::mt19937 g; 138 | std::uniform_int_distribution dis(0, 1); 139 | 140 | for (auto t = 0; t < 100; t++) 141 | { 142 | for (auto i = 0; i < N; i++) 143 | { 144 | inputs1[i] = dis(g) ? true : false; 145 | inputs2[i] = dis(g) ? true : false; 146 | } 147 | 148 | std::shuffle(inputs1, inputs1 + mipp::N(), g); 149 | std::shuffle(inputs2, inputs2 + mipp::N(), g); 150 | 151 | mipp::Msk m1 = inputs1; 152 | mipp::Msk m2 = inputs2; 153 | mipp::Msk m3 = m1 & m2; 154 | 155 | for (auto i = 0; i < N; i++) 156 | { 157 | bool res = inputs1[i] & inputs2[i]; 158 | REQUIRE(m3[i] == res); 159 | } 160 | } 161 | } 162 | 163 | TEST_CASE("Binary and - mipp::Msk", "[mipp::andb]") 164 | { 165 | #if defined(MIPP_64BIT) 166 | SECTION("datatype = int64_t") { test_Msk_andb(); } 167 | #endif 168 | SECTION("datatype = int32_t") { test_Msk_andb(); } 169 | #if defined(MIPP_BW) 170 | SECTION("datatype = int16_t") { test_Msk_andb(); } 171 | SECTION("datatype = int8_t") { test_Msk_andb(); } 172 | #endif 173 | } 174 | -------------------------------------------------------------------------------- /tests/src/bitwise_operations/andnb.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | #include 6 | #include 7 | #include 8 | 9 | template 10 | void test_reg_andnb() 11 | { 12 | T inputs1[mipp::N()], inputs2[mipp::N()]; 13 | std::iota(inputs1, inputs1 + mipp::N(), (T)0); 14 | std::iota(inputs2, inputs2 + mipp::N(), (T)0); 15 | 16 | std::mt19937 g; 17 | std::shuffle(inputs1, inputs1 + mipp::N(), g); 18 | std::shuffle(inputs2, inputs2 + mipp::N(), g); 19 | 20 | mipp::reg r1 = mipp::load(inputs1); 21 | mipp::reg r2 = mipp::load(inputs2); 22 | mipp::reg r3 = mipp::andnb(r1, r2); 23 | 24 | for (auto i = 0; i < mipp::N(); i++) 25 | { 26 | T res = ~inputs1[i] & inputs2[i]; 27 | REQUIRE(mipp::get(r3, i) == res); 28 | } 29 | } 30 | 31 | #if !defined(MIPP_NO) && !defined(MIPP_SVE_LS) 32 | TEST_CASE("Binary not and - mipp::reg", "[mipp::andnb]") 33 | { 34 | #if defined(MIPP_64BIT) 35 | SECTION("datatype = int64_t") { test_reg_andnb(); } 36 | #endif 37 | SECTION("datatype = int32_t") { test_reg_andnb(); } 38 | #if defined(MIPP_BW) 39 | SECTION("datatype = int16_t") { test_reg_andnb(); } 40 | SECTION("datatype = int8_t") { test_reg_andnb(); } 41 | #endif 42 | } 43 | #endif 44 | 45 | template 46 | void test_Reg_andnb() 47 | { 48 | T inputs1[mipp::N()], inputs2[mipp::N()]; 49 | std::iota(inputs1, inputs1 + mipp::N(), (T)0); 50 | std::iota(inputs2, inputs2 + mipp::N(), (T)0); 51 | 52 | std::mt19937 g; 53 | std::shuffle(inputs1, inputs1 + mipp::N(), g); 54 | std::shuffle(inputs2, inputs2 + mipp::N(), g); 55 | 56 | mipp::Reg r1 = inputs1; 57 | mipp::Reg r2 = inputs2; 58 | mipp::Reg r3 = mipp::andnb(r1, r2); 59 | 60 | for (auto i = 0; i < mipp::N(); i++) 61 | { 62 | T res = ~inputs1[i] & inputs2[i]; 63 | REQUIRE(r3[i] == res); 64 | } 65 | } 66 | 67 | #if !defined(MIPP_SVE_LS) 68 | TEST_CASE("Binary not and - mipp::Reg", "[mipp::andnb]") 69 | { 70 | #if defined(MIPP_64BIT) 71 | SECTION("datatype = int64_t") { test_Reg_andnb(); } 72 | #endif 73 | SECTION("datatype = int32_t") { test_Reg_andnb(); } 74 | #if defined(MIPP_BW) 75 | SECTION("datatype = int16_t") { test_Reg_andnb(); } 76 | SECTION("datatype = int8_t") { test_Reg_andnb(); } 77 | #endif 78 | } 79 | #endif 80 | 81 | template 82 | void test_msk_andnb() 83 | { 84 | constexpr int N = mipp::N(); 85 | bool inputs1[N], inputs2[N]; 86 | std::mt19937 g; 87 | std::uniform_int_distribution dis(0, 1); 88 | 89 | for (auto t = 0; t < 100; t++) 90 | { 91 | for (auto i = 0; i < N; i++) 92 | { 93 | inputs1[i] = dis(g) ? true : false; 94 | inputs2[i] = dis(g) ? true : false; 95 | } 96 | 97 | std::shuffle(inputs1, inputs1 + mipp::N(), g); 98 | std::shuffle(inputs2, inputs2 + mipp::N(), g); 99 | 100 | mipp::msk m1 = mipp::set(inputs1); 101 | mipp::msk m2 = mipp::set(inputs2); 102 | mipp::msk m3 = mipp::andnb(m1, m2); 103 | 104 | mipp::reg r = mipp::toreg(m3); 105 | 106 | for (auto i = 0; i < N; i++) 107 | { 108 | bool res = !inputs1[i] & inputs2[i]; 109 | 110 | if (res) 111 | REQUIRE(mipp::get(r, i) != (T)0); 112 | else 113 | REQUIRE(mipp::get(r, i) == (T)res); 114 | } 115 | } 116 | } 117 | 118 | #if !defined(MIPP_NO) && !defined(MIPP_SVE_LS) 119 | TEST_CASE("Binary not and - mipp::msk", "[mipp::andnb]") 120 | { 121 | #if defined(MIPP_64BIT) 122 | SECTION("datatype = int64_t") { test_msk_andnb(); } 123 | #endif 124 | SECTION("datatype = int32_t") { test_msk_andnb(); } 125 | #if defined(MIPP_BW) 126 | SECTION("datatype = int16_t") { test_msk_andnb(); } 127 | SECTION("datatype = int8_t") { test_msk_andnb(); } 128 | #endif 129 | } 130 | #endif 131 | 132 | template 133 | void test_Msk_andnb() 134 | { 135 | constexpr int N = mipp::N(); 136 | bool inputs1[N], inputs2[N]; 137 | std::mt19937 g; 138 | std::uniform_int_distribution dis(0, 1); 139 | 140 | for (auto t = 0; t < 100; t++) 141 | { 142 | for (auto i = 0; i < N; i++) 143 | { 144 | inputs1[i] = dis(g) ? true : false; 145 | inputs2[i] = dis(g) ? true : false; 146 | } 147 | 148 | std::shuffle(inputs1, inputs1 + mipp::N(), g); 149 | std::shuffle(inputs2, inputs2 + mipp::N(), g); 150 | 151 | mipp::Msk m1 = inputs1; 152 | mipp::Msk m2 = inputs2; 153 | mipp::Msk m3 = mipp::andnb(m1, m2); 154 | 155 | for (auto i = 0; i < N; i++) 156 | { 157 | bool res = !inputs1[i] & inputs2[i]; 158 | REQUIRE(m3[i] == res); 159 | } 160 | } 161 | } 162 | 163 | #if !defined(MIPP_SVE_LS) 164 | TEST_CASE("Binary not and - mipp::Msk", "[mipp::andnb]") 165 | { 166 | #if defined(MIPP_64BIT) 167 | SECTION("datatype = int64_t") { test_Msk_andnb(); } 168 | #endif 169 | SECTION("datatype = int32_t") { test_Msk_andnb(); } 170 | #if defined(MIPP_BW) 171 | SECTION("datatype = int16_t") { test_Msk_andnb(); } 172 | SECTION("datatype = int8_t") { test_Msk_andnb(); } 173 | #endif 174 | } 175 | #endif 176 | -------------------------------------------------------------------------------- /tests/src/bitwise_operations/lshiftr.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | #include 6 | #include 7 | #include 8 | 9 | template 10 | void test_reg_lshiftr() 11 | { 12 | T inputs1[mipp::N()]; 13 | T inputs2[mipp::N()]; 14 | std::iota(inputs1, inputs1 + mipp::N(), (T)0); 15 | std::iota(inputs2, inputs2 + mipp::N(), (T)0); 16 | 17 | std::mt19937 g; 18 | std::shuffle(inputs1, inputs1 + mipp::N(), g); 19 | std::shuffle(inputs2, inputs2 + mipp::N(), g); 20 | 21 | mipp::reg r1 = mipp::load(inputs1); 22 | mipp::reg r2 = mipp::load(inputs2); 23 | mipp::reg r3 = mipp::lshiftr(r1, r2); 24 | 25 | for (auto i = 0; i < mipp::N(); i++) 26 | { 27 | T res = inputs1[i] << inputs2[i]; 28 | REQUIRE(mipp::get(r3, i) == res); 29 | } 30 | } 31 | 32 | #if !defined(MIPP_NO) && !defined(MIPP_SVE_LS) 33 | TEST_CASE("Binary left shift (register) - mipp::reg", "[mipp::lshiftr]") 34 | { 35 | #if !defined(MIPP_AVX) || (defined(MIPP_AVX) && MIPP_INSTR_VERSION >= 2) 36 | #if defined(MIPP_64BIT) 37 | SECTION("datatype = int64_t") { test_reg_lshiftr(); } 38 | #endif 39 | SECTION("datatype = int32_t") { test_reg_lshiftr(); } 40 | #if defined(MIPP_BW) 41 | #if !defined(MIPP_AVX) 42 | SECTION("datatype = int16_t") { test_reg_lshiftr(); } 43 | #if !defined(MIPP_AVX512) 44 | SECTION("datatype = int8_t") { test_reg_lshiftr(); } 45 | #endif 46 | #endif 47 | #endif 48 | #endif 49 | } 50 | #endif 51 | 52 | template 53 | void test_Reg_lshiftr() 54 | { 55 | T inputs1[mipp::N()]; 56 | T inputs2[mipp::N()]; 57 | std::iota(inputs1, inputs1 + mipp::N(), (T)0); 58 | std::iota(inputs2, inputs2 + mipp::N(), (T)0); 59 | 60 | std::mt19937 g; 61 | std::shuffle(inputs1, inputs1 + mipp::N(), g); 62 | std::shuffle(inputs2, inputs2 + mipp::N(), g); 63 | 64 | mipp::Reg r1 = inputs1; 65 | mipp::Reg r2 = inputs2; 66 | mipp::Reg r3 = r1 << r2; 67 | 68 | for (auto i = 0; i < mipp::N(); i++) 69 | { 70 | T res = inputs1[i] << inputs2[i]; 71 | REQUIRE(r3[i] == res); 72 | } 73 | } 74 | 75 | #if !defined(MIPP_SVE_LS) 76 | TEST_CASE("Binary left shift (register) - mipp::Reg", "[mipp::lshiftr]") 77 | { 78 | #if !defined(MIPP_AVX) || (defined(MIPP_AVX) && MIPP_INSTR_VERSION >= 2) 79 | #if defined(MIPP_64BIT) 80 | SECTION("datatype = int64_t") { test_Reg_lshiftr(); } 81 | #endif 82 | SECTION("datatype = int32_t") { test_Reg_lshiftr(); } 83 | #if defined(MIPP_BW) 84 | #if !defined(MIPP_AVX) 85 | SECTION("datatype = int16_t") { test_Reg_lshiftr(); } 86 | #if !defined(MIPP_AVX512) 87 | SECTION("datatype = int8_t") { test_Reg_lshiftr(); } 88 | #endif 89 | #endif 90 | #endif 91 | #endif 92 | } 93 | #endif -------------------------------------------------------------------------------- /tests/src/bitwise_operations/notb.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | #include 6 | #include 7 | #include 8 | 9 | template 10 | void test_reg_notb() 11 | { 12 | T inputs1[mipp::N()]; 13 | std::iota(inputs1, inputs1 + mipp::N(), (T)0); 14 | 15 | std::mt19937 g; 16 | std::shuffle(inputs1, inputs1 + mipp::N(), g); 17 | 18 | mipp::reg r1 = mipp::load(inputs1); 19 | mipp::reg r2 = mipp::notb(r1); 20 | 21 | for (auto i = 0; i < mipp::N(); i++) 22 | { 23 | T res = ~inputs1[i]; 24 | REQUIRE(mipp::get(r2, i) == res); 25 | } 26 | } 27 | 28 | #if !defined(MIPP_NO) && !defined(MIPP_SVE_LS) 29 | TEST_CASE("Binary not - mipp::reg", "[mipp::notb]") 30 | { 31 | #if defined(MIPP_64BIT) 32 | SECTION("datatype = int64_t") { test_reg_notb(); } 33 | #endif 34 | SECTION("datatype = int32_t") { test_reg_notb(); } 35 | #if defined(MIPP_BW) 36 | SECTION("datatype = int16_t") { test_reg_notb(); } 37 | SECTION("datatype = int8_t") { test_reg_notb(); } 38 | #endif 39 | } 40 | #endif 41 | 42 | template 43 | void test_Reg_notb() 44 | { 45 | T inputs1[mipp::N()]; 46 | std::iota(inputs1, inputs1 + mipp::N(), (T)0); 47 | 48 | std::mt19937 g; 49 | std::shuffle(inputs1, inputs1 + mipp::N(), g); 50 | 51 | mipp::Reg r1 = inputs1; 52 | mipp::Reg r2 = ~r1; 53 | 54 | for (auto i = 0; i < mipp::N(); i++) 55 | { 56 | T res = ~inputs1[i]; 57 | REQUIRE(r2[i] == res); 58 | } 59 | } 60 | 61 | #if !defined(MIPP_SVE_LS) 62 | TEST_CASE("Binary not - mipp::Reg", "[mipp::notb]") 63 | { 64 | #if defined(MIPP_64BIT) 65 | SECTION("datatype = int64_t") { test_Reg_notb(); } 66 | #endif 67 | SECTION("datatype = int32_t") { test_Reg_notb(); } 68 | #if defined(MIPP_BW) 69 | SECTION("datatype = int16_t") { test_Reg_notb(); } 70 | SECTION("datatype = int8_t") { test_Reg_notb(); } 71 | #endif 72 | } 73 | #endif 74 | 75 | template 76 | void test_msk_notb() 77 | { 78 | constexpr int N = mipp::N(); 79 | bool inputs1[N]; 80 | std::mt19937 g; 81 | std::uniform_int_distribution dis(0, 1); 82 | 83 | for (auto t = 0; t < 100; t++) 84 | { 85 | for (auto i = 0; i < N; i++) 86 | inputs1[i] = dis(g) ? true : false; 87 | 88 | std::shuffle(inputs1, inputs1 + mipp::N(), g); 89 | 90 | mipp::msk m1 = mipp::set(inputs1); 91 | mipp::msk m2 = mipp::notb(m1); 92 | 93 | mipp::reg r = mipp::toreg(m2); 94 | 95 | for (auto i = 0; i < N; i++) 96 | { 97 | bool res = !inputs1[i]; 98 | 99 | if (res) 100 | REQUIRE(mipp::get(r, i) != (T)0); 101 | else 102 | REQUIRE(mipp::get(r, i) == (T)res); 103 | } 104 | } 105 | } 106 | 107 | #ifndef MIPP_NO 108 | TEST_CASE("Binary not - mipp::msk", "[mipp::notb]") 109 | { 110 | #if defined(MIPP_64BIT) 111 | SECTION("datatype = int64_t") { test_msk_notb(); } 112 | #endif 113 | SECTION("datatype = int32_t") { test_msk_notb(); } 114 | #if defined(MIPP_BW) 115 | SECTION("datatype = int16_t") { test_msk_notb(); } 116 | SECTION("datatype = int8_t") { test_msk_notb(); } 117 | #endif 118 | } 119 | #endif 120 | 121 | template 122 | void test_Msk_notb() 123 | { 124 | constexpr int N = mipp::N(); 125 | bool inputs1[N]; 126 | std::mt19937 g; 127 | std::uniform_int_distribution dis(0, 1); 128 | 129 | for (auto t = 0; t < 100; t++) 130 | { 131 | for (auto i = 0; i < N; i++) 132 | inputs1[i] = dis(g) ? true : false; 133 | 134 | std::shuffle(inputs1, inputs1 + mipp::N(), g); 135 | 136 | mipp::Msk m1 = inputs1; 137 | mipp::Msk m2 = ~m1; 138 | 139 | for (auto i = 0; i < N; i++) 140 | { 141 | bool res = !inputs1[i]; 142 | REQUIRE(m2[i] == res); 143 | } 144 | } 145 | } 146 | 147 | TEST_CASE("Binary not - mipp::Msk", "[mipp::notb]") 148 | { 149 | #if defined(MIPP_64BIT) 150 | SECTION("datatype = int64_t") { test_Msk_notb(); } 151 | #endif 152 | SECTION("datatype = int32_t") { test_Msk_notb(); } 153 | #if defined(MIPP_BW) 154 | SECTION("datatype = int16_t") { test_Msk_notb(); } 155 | SECTION("datatype = int8_t") { test_Msk_notb(); } 156 | #endif 157 | } -------------------------------------------------------------------------------- /tests/src/bitwise_operations/orb.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | #include 6 | #include 7 | #include 8 | 9 | template 10 | void test_reg_orb() 11 | { 12 | T inputs1[mipp::N()], inputs2[mipp::N()]; 13 | std::iota(inputs1, inputs1 + mipp::N(), (T)0); 14 | std::iota(inputs2, inputs2 + mipp::N(), (T)0); 15 | 16 | std::mt19937 g; 17 | std::shuffle(inputs1, inputs1 + mipp::N(), g); 18 | std::shuffle(inputs2, inputs2 + mipp::N(), g); 19 | 20 | mipp::reg r1 = mipp::load(inputs1); 21 | mipp::reg r2 = mipp::load(inputs2); 22 | mipp::reg r3 = mipp::orb(r1, r2); 23 | 24 | for (auto i = 0; i < mipp::N(); i++) 25 | { 26 | T res = inputs1[i] | inputs2[i]; 27 | REQUIRE(mipp::get(r3, i) == res); 28 | } 29 | } 30 | 31 | #if !defined(MIPP_NO) && !defined(MIPP_SVE_LS) 32 | TEST_CASE("Binary or - mipp::reg", "[mipp::orb]") 33 | { 34 | #if defined(MIPP_64BIT) 35 | SECTION("datatype = int64_t") { test_reg_orb(); } 36 | #endif 37 | SECTION("datatype = int32_t") { test_reg_orb(); } 38 | #if defined(MIPP_BW) 39 | SECTION("datatype = int16_t") { test_reg_orb(); } 40 | SECTION("datatype = int8_t") { test_reg_orb(); } 41 | #endif 42 | } 43 | #endif 44 | 45 | template 46 | void test_Reg_orb() 47 | { 48 | T inputs1[mipp::N()], inputs2[mipp::N()]; 49 | std::iota(inputs1, inputs1 + mipp::N(), (T)0); 50 | std::iota(inputs2, inputs2 + mipp::N(), (T)0); 51 | 52 | std::mt19937 g; 53 | std::shuffle(inputs1, inputs1 + mipp::N(), g); 54 | std::shuffle(inputs2, inputs2 + mipp::N(), g); 55 | 56 | mipp::Reg r1 = inputs1; 57 | mipp::Reg r2 = inputs2; 58 | mipp::Reg r3 = r1 | r2; 59 | 60 | for (auto i = 0; i < mipp::N(); i++) 61 | { 62 | T res = inputs1[i] | inputs2[i]; 63 | REQUIRE(r3[i] == res); 64 | } 65 | } 66 | 67 | #if !defined(MIPP_SVE_LS) 68 | TEST_CASE("Binary or - mipp::Reg", "[mipp::orb]") 69 | { 70 | #if defined(MIPP_64BIT) 71 | SECTION("datatype = int64_t") { test_Reg_orb(); } 72 | #endif 73 | SECTION("datatype = int32_t") { test_Reg_orb(); } 74 | #if defined(MIPP_BW) 75 | SECTION("datatype = int16_t") { test_Reg_orb(); } 76 | SECTION("datatype = int8_t") { test_Reg_orb(); } 77 | #endif 78 | } 79 | #endif 80 | 81 | template 82 | void test_msk_orb() 83 | { 84 | constexpr int N = mipp::N(); 85 | bool inputs1[N], inputs2[N]; 86 | std::mt19937 g; 87 | std::uniform_int_distribution dis(0, 1); 88 | 89 | for (auto t = 0; t < 100; t++) 90 | { 91 | for (auto i = 0; i < N; i++) 92 | { 93 | inputs1[i] = dis(g) ? true : false; 94 | inputs2[i] = dis(g) ? true : false; 95 | } 96 | 97 | std::shuffle(inputs1, inputs1 + mipp::N(), g); 98 | std::shuffle(inputs2, inputs2 + mipp::N(), g); 99 | 100 | mipp::msk m1 = mipp::set(inputs1); 101 | mipp::msk m2 = mipp::set(inputs2); 102 | mipp::msk m3 = mipp::orb(m1, m2); 103 | 104 | mipp::reg r = mipp::toreg(m3); 105 | 106 | for (auto i = 0; i < N; i++) 107 | { 108 | bool res = inputs1[i] | inputs2[i]; 109 | 110 | if (res) 111 | REQUIRE(mipp::get(r, i) != (T)0); 112 | else 113 | REQUIRE(mipp::get(r, i) == (T)res); 114 | } 115 | } 116 | } 117 | 118 | #if !defined(MIPP_NO) && !defined(MIPP_SVE_LS) 119 | TEST_CASE("Binary or - mipp::msk", "[mipp::orb]") 120 | { 121 | #if defined(MIPP_64BIT) 122 | SECTION("datatype = int64_t") { test_msk_orb(); } 123 | #endif 124 | SECTION("datatype = int32_t") { test_msk_orb(); } 125 | #if defined(MIPP_BW) 126 | SECTION("datatype = int16_t") { test_msk_orb(); } 127 | SECTION("datatype = int8_t") { test_msk_orb(); } 128 | #endif 129 | } 130 | #endif 131 | 132 | template 133 | void test_Msk_orb() 134 | { 135 | constexpr int N = mipp::N(); 136 | bool inputs1[N], inputs2[N]; 137 | std::mt19937 g; 138 | std::uniform_int_distribution dis(0, 1); 139 | 140 | for (auto t = 0; t < 100; t++) 141 | { 142 | for (auto i = 0; i < N; i++) 143 | { 144 | inputs1[i] = dis(g) ? true : false; 145 | inputs2[i] = dis(g) ? true : false; 146 | } 147 | 148 | std::shuffle(inputs1, inputs1 + mipp::N(), g); 149 | std::shuffle(inputs2, inputs2 + mipp::N(), g); 150 | 151 | mipp::Msk m1 = inputs1; 152 | mipp::Msk m2 = inputs2; 153 | mipp::Msk m3 = m1 | m2; 154 | 155 | for (auto i = 0; i < N; i++) 156 | { 157 | bool res = inputs1[i] | inputs2[i]; 158 | REQUIRE(m3[i] == res); 159 | } 160 | } 161 | } 162 | 163 | #if !defined(MIPP_SVE_LS) 164 | TEST_CASE("Binary or - mipp::Msk", "[mipp::orb]") 165 | { 166 | #if defined(MIPP_64BIT) 167 | SECTION("datatype = int64_t") { test_Msk_orb(); } 168 | #endif 169 | SECTION("datatype = int32_t") { test_Msk_orb(); } 170 | #if defined(MIPP_BW) 171 | SECTION("datatype = int16_t") { test_Msk_orb(); } 172 | SECTION("datatype = int8_t") { test_Msk_orb(); } 173 | #endif 174 | } 175 | #endif 176 | -------------------------------------------------------------------------------- /tests/src/bitwise_operations/rshiftr.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | #include 6 | #include 7 | #include 8 | 9 | template 10 | void test_reg_rshiftr() 11 | { 12 | T inputs1[mipp::N()]; 13 | T inputs2[mipp::N()]; 14 | std::iota(inputs1, inputs1 + mipp::N(), (T)0); 15 | std::iota(inputs2, inputs2 + mipp::N(), (T)0); 16 | 17 | std::mt19937 g; 18 | std::shuffle(inputs1, inputs1 + mipp::N(), g); 19 | std::shuffle(inputs2, inputs2 + mipp::N(), g); 20 | 21 | mipp::reg r1 = mipp::load(inputs1); 22 | mipp::reg r2 = mipp::load(inputs2); 23 | mipp::reg r3 = mipp::rshiftr(r1, r2); 24 | 25 | for (auto i = 0; i < mipp::N(); i++) 26 | { 27 | T res = inputs1[i] >> inputs2[i]; 28 | REQUIRE(mipp::get(r3, i) == res); 29 | } 30 | } 31 | 32 | #if !defined(MIPP_NO) && !defined(MIPP_SVE_LS) 33 | TEST_CASE("Binary right shift (register) - mipp::reg", "[mipp::rshiftr]") 34 | { 35 | #if !defined(MIPP_AVX) || (defined(MIPP_AVX) && MIPP_INSTR_VERSION >= 2) 36 | #if defined(MIPP_64BIT) 37 | SECTION("datatype = int64_t") { test_reg_rshiftr(); } 38 | #endif 39 | SECTION("datatype = int32_t") { test_reg_rshiftr(); } 40 | #if defined(MIPP_BW) 41 | #if !defined(MIPP_AVX) 42 | SECTION("datatype = int16_t") { test_reg_rshiftr(); } 43 | #if !defined(MIPP_AVX512) 44 | SECTION("datatype = int8_t") { test_reg_rshiftr(); } 45 | #endif 46 | #endif 47 | #endif 48 | #endif 49 | } 50 | #endif 51 | 52 | template 53 | void test_Reg_rshiftr() 54 | { 55 | T inputs1[mipp::N()]; 56 | T inputs2[mipp::N()]; 57 | std::iota(inputs1, inputs1 + mipp::N(), (T)0); 58 | std::iota(inputs2, inputs2 + mipp::N(), (T)0); 59 | 60 | std::mt19937 g; 61 | std::shuffle(inputs1, inputs1 + mipp::N(), g); 62 | std::shuffle(inputs2, inputs2 + mipp::N(), g); 63 | 64 | mipp::Reg r1 = inputs1; 65 | mipp::Reg r2 = inputs2; 66 | mipp::Reg r3 = r1 >> r2; 67 | 68 | for (auto i = 0; i < mipp::N(); i++) 69 | { 70 | T res = inputs1[i] >> inputs2[i]; 71 | REQUIRE(r3[i] == res); 72 | } 73 | } 74 | 75 | #if !defined(MIPP_SVE_LS) 76 | TEST_CASE("Binary right shift (register) - mipp::Reg", "[mipp::rshiftr]") 77 | { 78 | #if !defined(MIPP_AVX) || (defined(MIPP_AVX) && MIPP_INSTR_VERSION >= 2) 79 | #if defined(MIPP_64BIT) 80 | SECTION("datatype = int64_t") { test_Reg_rshiftr(); } 81 | #endif 82 | SECTION("datatype = int32_t") { test_Reg_rshiftr(); } 83 | #if defined(MIPP_BW) 84 | #if !defined(MIPP_AVX) 85 | SECTION("datatype = int16_t") { test_Reg_rshiftr(); } 86 | #if !defined(MIPP_AVX512) 87 | SECTION("datatype = int8_t") { test_Reg_rshiftr(); } 88 | #endif 89 | #endif 90 | #endif 91 | #endif 92 | } 93 | #endif 94 | -------------------------------------------------------------------------------- /tests/src/bitwise_operations/xorb.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | #include 6 | #include 7 | #include 8 | 9 | template 10 | void test_reg_xorb() 11 | { 12 | 13 | T inputs1[mipp::N()], inputs2[mipp::N()]; 14 | std::iota(inputs1, inputs1 + mipp::N(), (T)0); 15 | std::iota(inputs2, inputs2 + mipp::N(), (T)0); 16 | 17 | std::mt19937 g; 18 | std::shuffle(inputs1, inputs1 + mipp::N(), g); 19 | std::shuffle(inputs2, inputs2 + mipp::N(), g); 20 | 21 | mipp::reg r1 = mipp::load(inputs1); 22 | mipp::reg r2 = mipp::load(inputs2); 23 | mipp::reg r3 = mipp::xorb(r1, r2); 24 | 25 | for (auto i = 0; i < mipp::N(); i++) 26 | { 27 | T res = inputs1[i] ^ inputs2[i]; 28 | REQUIRE(mipp::get(r3, i) == res); 29 | } 30 | } 31 | 32 | #if !defined(MIPP_NO) && !defined(MIPP_SVE_LS) 33 | TEST_CASE("Binary xor - mipp::reg", "[mipp::xorb]") 34 | { 35 | #if defined(MIPP_64BIT) 36 | SECTION("datatype = int64_t") { test_reg_xorb(); } 37 | #endif 38 | SECTION("datatype = int32_t") { test_reg_xorb(); } 39 | #if defined(MIPP_BW) 40 | SECTION("datatype = int16_t") { test_reg_xorb(); } 41 | SECTION("datatype = int8_t") { test_reg_xorb(); } 42 | #endif 43 | } 44 | #endif 45 | 46 | template 47 | void test_Reg_xorb() 48 | { 49 | T inputs1[mipp::N()], inputs2[mipp::N()]; 50 | std::iota(inputs1, inputs1 + mipp::N(), (T)0); 51 | std::iota(inputs2, inputs2 + mipp::N(), (T)0); 52 | 53 | std::mt19937 g; 54 | std::shuffle(inputs1, inputs1 + mipp::N(), g); 55 | std::shuffle(inputs2, inputs2 + mipp::N(), g); 56 | 57 | mipp::Reg r1 = inputs1; 58 | mipp::Reg r2 = inputs2; 59 | mipp::Reg r3 = r1 ^ r2; 60 | 61 | for (auto i = 0; i < mipp::N(); i++) 62 | { 63 | T res = inputs1[i] ^ inputs2[i]; 64 | REQUIRE(r3[i] == res); 65 | } 66 | } 67 | 68 | #if !defined(MIPP_SVE_LS) 69 | TEST_CASE("Binary xor - mipp::Reg", "[mipp::xorb]") 70 | { 71 | #if defined(MIPP_64BIT) 72 | SECTION("datatype = int64_t") { test_Reg_xorb(); } 73 | #endif 74 | SECTION("datatype = int32_t") { test_Reg_xorb(); } 75 | #if defined(MIPP_BW) 76 | SECTION("datatype = int16_t") { test_Reg_xorb(); } 77 | SECTION("datatype = int8_t") { test_Reg_xorb(); } 78 | #endif 79 | } 80 | #endif 81 | 82 | template 83 | void test_msk_xorb() 84 | { 85 | constexpr int N = mipp::N(); 86 | bool inputs1[N], inputs2[N]; 87 | std::mt19937 g; 88 | std::uniform_int_distribution dis(0, 1); 89 | 90 | for (auto t = 0; t < 100; t++) 91 | { 92 | for (auto i = 0; i < N; i++) 93 | { 94 | inputs1[i] = dis(g) ? true : false; 95 | inputs2[i] = dis(g) ? true : false; 96 | } 97 | 98 | std::shuffle(inputs1, inputs1 + mipp::N(), g); 99 | std::shuffle(inputs2, inputs2 + mipp::N(), g); 100 | 101 | mipp::msk m1 = mipp::set(inputs1); 102 | mipp::msk m2 = mipp::set(inputs2); 103 | mipp::msk m3 = mipp::xorb(m1, m2); 104 | 105 | mipp::reg r = mipp::toreg(m3); 106 | 107 | for (auto i = 0; i < N; i++) 108 | { 109 | bool res = inputs1[i] ^ inputs2[i]; 110 | 111 | if (res) 112 | REQUIRE(mipp::get(r, i) != (T)0); 113 | else 114 | REQUIRE(mipp::get(r, i) == (T)res); 115 | } 116 | } 117 | } 118 | 119 | #if !defined(MIPP_NO) && !defined(MIPP_SVE_LS) 120 | TEST_CASE("Binary xor - mipp::msk", "[mipp::xorb]") 121 | { 122 | #if defined(MIPP_64BIT) 123 | SECTION("datatype = int64_t") { test_msk_xorb(); } 124 | #endif 125 | SECTION("datatype = int32_t") { test_msk_xorb(); } 126 | #if defined(MIPP_BW) 127 | SECTION("datatype = int16_t") { test_msk_xorb(); } 128 | SECTION("datatype = int8_t") { test_msk_xorb(); } 129 | #endif 130 | } 131 | #endif 132 | 133 | template 134 | void test_Msk_xorb() 135 | { 136 | constexpr int N = mipp::N(); 137 | bool inputs1[N], inputs2[N]; 138 | std::mt19937 g; 139 | std::uniform_int_distribution dis(0, 1); 140 | 141 | for (auto t = 0; t < 100; t++) 142 | { 143 | for (auto i = 0; i < N; i++) 144 | { 145 | inputs1[i] = dis(g) ? true : false; 146 | inputs2[i] = dis(g) ? true : false; 147 | } 148 | 149 | std::shuffle(inputs1, inputs1 + mipp::N(), g); 150 | std::shuffle(inputs2, inputs2 + mipp::N(), g); 151 | 152 | mipp::Msk m1 = inputs1; 153 | mipp::Msk m2 = inputs2; 154 | mipp::Msk m3 = m1 ^ m2; 155 | 156 | for (auto i = 0; i < N; i++) 157 | { 158 | bool res = inputs1[i] ^ inputs2[i]; 159 | REQUIRE(m3[i] == res); 160 | } 161 | } 162 | } 163 | 164 | #if !defined(MIPP_SVE_LS) 165 | TEST_CASE("Binary xor - mipp::Msk", "[mipp::xorb]") 166 | { 167 | #if defined(MIPP_64BIT) 168 | SECTION("datatype = int64_t") { test_Msk_xorb(); } 169 | #endif 170 | SECTION("datatype = int32_t") { test_Msk_xorb(); } 171 | #if defined(MIPP_BW) 172 | SECTION("datatype = int16_t") { test_Msk_xorb(); } 173 | SECTION("datatype = int8_t") { test_Msk_xorb(); } 174 | #endif 175 | } 176 | #endif 177 | 178 | -------------------------------------------------------------------------------- /tests/src/logical_comparisons/cmpeq.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | #include 6 | #include 7 | #include 8 | 9 | template 10 | void test_reg_cmpeq() 11 | { 12 | T inputs1[mipp::N()], inputs2[mipp::N()]; 13 | std::iota(inputs1, inputs1 + mipp::N(), (T)1); 14 | std::iota(inputs2, inputs2 + mipp::N(), (T)1); 15 | 16 | std::mt19937 g; 17 | for (auto t = 0; t < 100; t++) 18 | { 19 | std::shuffle(inputs1, inputs1 + mipp::N(), g); 20 | std::shuffle(inputs2, inputs2 + mipp::N(), g); 21 | 22 | mipp::reg r1 = mipp::load(inputs1); 23 | mipp::reg r2 = mipp::load(inputs2); 24 | mipp::msk m = mipp::cmpeq(r1, r2); 25 | 26 | mipp::reg r3 = mipp::toreg()>(m); 27 | 28 | for (auto i = 0; i < mipp::N(); i++) 29 | { 30 | if (inputs1[i] == inputs2[i]) 31 | REQUIRE(mipp::get(r3, i) != (T)0); 32 | else 33 | REQUIRE(mipp::get(r3, i) == (T)0); 34 | } 35 | } 36 | } 37 | 38 | #ifndef MIPP_NO 39 | TEST_CASE("Compare equal - mipp::reg", "[mipp::cmpeq]") 40 | { 41 | #if defined(MIPP_64BIT) 42 | SECTION("datatype = double") { test_reg_cmpeq(); } 43 | #endif 44 | SECTION("datatype = float") { test_reg_cmpeq(); } 45 | 46 | #if !defined(MIPP_AVX) || defined(MIPP_AVX2) || (defined(MIPP_AVX) && !defined(__INTEL_COMPILER) && !defined(__ICL) && !defined(__ICC)) 47 | #if defined(MIPP_64BIT) 48 | #if !defined(MIPP_SSE) || (defined(MIPP_SSE) && MIPP_INSTR_VERSION >= 41) 49 | SECTION("datatype = int64_t") { test_reg_cmpeq(); } 50 | #endif 51 | #endif 52 | SECTION("datatype = int32_t") { test_reg_cmpeq(); } 53 | #endif 54 | #if defined(MIPP_BW) 55 | SECTION("datatype = int16_t") { test_reg_cmpeq(); } 56 | SECTION("datatype = int8_t") { test_reg_cmpeq(); } 57 | #endif 58 | } 59 | #endif 60 | 61 | template 62 | void test_Reg_cmpeq() 63 | { 64 | T inputs1[mipp::N()], inputs2[mipp::N()]; 65 | std::iota(inputs1, inputs1 + mipp::N(), (T)1); 66 | std::iota(inputs2, inputs2 + mipp::N(), (T)1); 67 | 68 | std::mt19937 g; 69 | for (auto t = 0; t < 100; t++) 70 | { 71 | std::shuffle(inputs1, inputs1 + mipp::N(), g); 72 | std::shuffle(inputs2, inputs2 + mipp::N(), g); 73 | 74 | mipp::Reg r1 = inputs1; 75 | mipp::Reg r2 = inputs2; 76 | mipp::Msk()> m = r1 == r2; 77 | 78 | for (auto i = 0; i < mipp::N(); i++) 79 | REQUIRE(m[i] == (inputs1[i] == inputs2[i])); 80 | } 81 | } 82 | 83 | TEST_CASE("Compare equal - mipp::Reg", "[mipp::cmpeq]") 84 | { 85 | #if defined(MIPP_64BIT) 86 | SECTION("datatype = double") { test_Reg_cmpeq(); } 87 | #endif 88 | SECTION("datatype = float") { test_Reg_cmpeq(); } 89 | 90 | #if !defined(MIPP_AVX) || defined(MIPP_AVX2) || (defined(MIPP_AVX) && !defined(__INTEL_COMPILER) && !defined(__ICL) && !defined(__ICC)) 91 | #if defined(MIPP_64BIT) 92 | #if !defined(MIPP_SSE) || (defined(MIPP_SSE) && MIPP_INSTR_VERSION >= 41) 93 | SECTION("datatype = int64_t") { test_Reg_cmpeq(); } 94 | #endif 95 | #endif 96 | SECTION("datatype = int32_t") { test_Reg_cmpeq(); } 97 | #endif 98 | #if defined(MIPP_BW) 99 | SECTION("datatype = int16_t") { test_Reg_cmpeq(); } 100 | SECTION("datatype = int8_t") { test_Reg_cmpeq(); } 101 | #endif 102 | } 103 | -------------------------------------------------------------------------------- /tests/src/logical_comparisons/cmpge.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | #include 6 | #include 7 | #include 8 | 9 | template 10 | void test_reg_cmpge() 11 | { 12 | T inputs1[mipp::N()], inputs2[mipp::N()]; 13 | std::iota(inputs1, inputs1 + mipp::N(), (T)1); 14 | std::iota(inputs2, inputs2 + mipp::N(), (T)1); 15 | 16 | std::mt19937 g; 17 | for (auto t = 0; t < 100; t++) 18 | { 19 | std::shuffle(inputs1, inputs1 + mipp::N(), g); 20 | std::shuffle(inputs2, inputs2 + mipp::N(), g); 21 | 22 | mipp::reg r1 = mipp::load(inputs1); 23 | mipp::reg r2 = mipp::load(inputs2); 24 | mipp::msk m = mipp::cmpge(r1, r2); 25 | 26 | mipp::reg r3 = mipp::toreg()>(m); 27 | 28 | for (auto i = 0; i < mipp::N(); i++) 29 | { 30 | if (inputs1[i] >= inputs2[i]) 31 | REQUIRE(mipp::get(r3, i) != (T)0); 32 | else 33 | REQUIRE(mipp::get(r3, i) == (T)0); 34 | } 35 | } 36 | } 37 | 38 | #if !defined(MIPP_NO) && !defined(MIPP_SVE_LS) 39 | TEST_CASE("Compare greater or equal - mipp::reg", "[mipp::cmpge]") 40 | { 41 | #if defined(MIPP_64BIT) 42 | SECTION("datatype = double") { test_reg_cmpge(); } 43 | #endif 44 | SECTION("datatype = float") { test_reg_cmpge(); } 45 | 46 | #if !defined(MIPP_AVX) || (defined(MIPP_AVX) && MIPP_INSTR_VERSION >= 2) 47 | #if !defined(MIPP_SSE) 48 | #if defined(MIPP_64BIT) 49 | SECTION("datatype = int64_t") { test_reg_cmpge(); } 50 | SECTION("datatype = uint64_t") { test_reg_cmpge(); } 51 | #endif 52 | #endif 53 | SECTION("datatype = int32_t") { test_reg_cmpge(); } 54 | SECTION("datatype = uint32_t") { test_reg_cmpge(); } 55 | #endif 56 | #if defined(MIPP_BW) 57 | SECTION("datatype = int16_t") { test_reg_cmpge(); } 58 | SECTION("datatype = uint16_t") { test_reg_cmpge(); } 59 | SECTION("datatype = int8_t") { test_reg_cmpge(); } 60 | SECTION("datatype = uint8_t") { test_reg_cmpge(); } 61 | #endif 62 | } 63 | #endif 64 | 65 | template 66 | void test_Reg_cmpge() 67 | { 68 | T inputs1[mipp::N()], inputs2[mipp::N()]; 69 | std::iota(inputs1, inputs1 + mipp::N(), (T)1); 70 | std::iota(inputs2, inputs2 + mipp::N(), (T)1); 71 | 72 | std::mt19937 g; 73 | for (auto t = 0; t < 100; t++) 74 | { 75 | std::shuffle(inputs1, inputs1 + mipp::N(), g); 76 | std::shuffle(inputs2, inputs2 + mipp::N(), g); 77 | 78 | mipp::Reg r1 = inputs1; 79 | mipp::Reg r2 = inputs2; 80 | mipp::Msk()> m = r1 >= r2; 81 | 82 | for (auto i = 0; i < mipp::N(); i++) 83 | REQUIRE(m[i] == (inputs1[i] >= inputs2[i])); 84 | } 85 | } 86 | 87 | #if !defined(MIPP_SVE_LS) 88 | TEST_CASE("Compare greater or equal - mipp::Reg", "[mipp::cmpge]") 89 | { 90 | #if defined(MIPP_64BIT) 91 | SECTION("datatype = double") { test_Reg_cmpge(); } 92 | #endif 93 | SECTION("datatype = float") { test_Reg_cmpge(); } 94 | 95 | #if !defined(MIPP_AVX) || (defined(MIPP_AVX) && MIPP_INSTR_VERSION >= 2) 96 | #if !defined(MIPP_SSE) 97 | #if defined(MIPP_64BIT) 98 | SECTION("datatype = int64_t") { test_Reg_cmpge(); } 99 | SECTION("datatype = uint64_t") { test_Reg_cmpge(); } 100 | #endif 101 | #endif 102 | SECTION("datatype = int32_t") { test_Reg_cmpge(); } 103 | SECTION("datatype = uint32_t") { test_Reg_cmpge(); } 104 | #endif 105 | #if defined(MIPP_BW) 106 | SECTION("datatype = int16_t") { test_Reg_cmpge(); } 107 | SECTION("datatype = uint16_t") { test_Reg_cmpge(); } 108 | SECTION("datatype = int8_t") { test_Reg_cmpge(); } 109 | SECTION("datatype = uint8_t") { test_Reg_cmpge(); } 110 | #endif 111 | } 112 | #endif 113 | -------------------------------------------------------------------------------- /tests/src/logical_comparisons/cmpgt.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | #include 6 | #include 7 | #include 8 | 9 | template 10 | void test_reg_cmpgt() 11 | { 12 | T inputs1[mipp::N()], inputs2[mipp::N()]; 13 | std::iota(inputs1, inputs1 + mipp::N(), (T)1); 14 | std::iota(inputs2, inputs2 + mipp::N(), (T)1); 15 | 16 | std::mt19937 g; 17 | for (auto t = 0; t < 100; t++) 18 | { 19 | std::shuffle(inputs1, inputs1 + mipp::N(), g); 20 | std::shuffle(inputs2, inputs2 + mipp::N(), g); 21 | 22 | mipp::reg r1 = mipp::load(inputs1); 23 | mipp::reg r2 = mipp::load(inputs2); 24 | mipp::msk m = mipp::cmpgt(r1, r2); 25 | 26 | mipp::reg r3 = mipp::toreg()>(m); 27 | 28 | for (auto i = 0; i < mipp::N(); i++) 29 | { 30 | if (inputs1[i] > inputs2[i]) 31 | REQUIRE(mipp::get(r3, i) != (T)0); 32 | else 33 | REQUIRE(mipp::get(r3, i) == (T)0); 34 | } 35 | } 36 | } 37 | 38 | #if !defined(MIPP_NO) && !defined(MIPP_SVE_LS) 39 | TEST_CASE("Compare greater than - mipp::reg", "[mipp::cmpgt]") 40 | { 41 | #if defined(MIPP_64BIT) 42 | SECTION("datatype = double") { test_reg_cmpgt(); } 43 | #endif 44 | SECTION("datatype = float") { test_reg_cmpgt(); } 45 | 46 | #if !defined(MIPP_AVX) || (defined(MIPP_AVX) && MIPP_INSTR_VERSION >= 2) 47 | #if !defined(MIPP_SSE) 48 | #if defined(MIPP_64BIT) 49 | SECTION("datatype = int64_t") { test_reg_cmpgt(); } 50 | SECTION("datatype = uint64_t") { test_reg_cmpgt(); } 51 | #endif 52 | #endif 53 | SECTION("datatype = int32_t") { test_reg_cmpgt(); } 54 | SECTION("datatype = uint32_t") { test_reg_cmpgt(); } 55 | #endif 56 | #if defined(MIPP_BW) 57 | SECTION("datatype = int16_t") { test_reg_cmpgt(); } 58 | SECTION("datatype = uint16_t") { test_reg_cmpgt(); } 59 | SECTION("datatype = int8_t") { test_reg_cmpgt(); } 60 | SECTION("datatype = uint8_t") { test_reg_cmpgt(); } 61 | #endif 62 | } 63 | #endif 64 | 65 | template 66 | void test_Reg_cmpgt() 67 | { 68 | T inputs1[mipp::N()], inputs2[mipp::N()]; 69 | std::iota(inputs1, inputs1 + mipp::N(), (T)1); 70 | std::iota(inputs2, inputs2 + mipp::N(), (T)1); 71 | 72 | std::mt19937 g; 73 | for (auto t = 0; t < 100; t++) 74 | { 75 | std::shuffle(inputs1, inputs1 + mipp::N(), g); 76 | std::shuffle(inputs2, inputs2 + mipp::N(), g); 77 | 78 | mipp::Reg r1 = inputs1; 79 | mipp::Reg r2 = inputs2; 80 | mipp::Msk()> m = r1 > r2; 81 | 82 | for (auto i = 0; i < mipp::N(); i++) 83 | REQUIRE(m[i] == (inputs1[i] > inputs2[i])); 84 | } 85 | } 86 | 87 | #if !defined(MIPP_SVE_LS) 88 | TEST_CASE("Compare greater than - mipp::Reg", "[mipp::cmpgt]") 89 | { 90 | #if defined(MIPP_64BIT) 91 | SECTION("datatype = double") { test_Reg_cmpgt(); } 92 | #endif 93 | SECTION("datatype = float") { test_Reg_cmpgt(); } 94 | 95 | #if !defined(MIPP_AVX) || (defined(MIPP_AVX) && MIPP_INSTR_VERSION >= 2) 96 | #if !defined(MIPP_SSE) 97 | #if defined(MIPP_64BIT) 98 | SECTION("datatype = int64_t") { test_Reg_cmpgt(); } 99 | SECTION("datatype = uint64_t") { test_Reg_cmpgt(); } 100 | #endif 101 | #endif 102 | SECTION("datatype = int32_t") { test_Reg_cmpgt(); } 103 | SECTION("datatype = uint32_t") { test_Reg_cmpgt(); } 104 | #endif 105 | #if defined(MIPP_BW) 106 | SECTION("datatype = int16_t") { test_Reg_cmpgt(); } 107 | SECTION("datatype = uint16_t") { test_Reg_cmpgt(); } 108 | SECTION("datatype = int8_t") { test_Reg_cmpgt(); } 109 | SECTION("datatype = uint8_t") { test_Reg_cmpgt(); } 110 | #endif 111 | } 112 | #endif 113 | -------------------------------------------------------------------------------- /tests/src/logical_comparisons/cmple.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | #include 6 | #include 7 | #include 8 | 9 | template 10 | void test_reg_cmple() 11 | { 12 | T inputs1[mipp::N()], inputs2[mipp::N()]; 13 | std::iota(inputs1, inputs1 + mipp::N(), (T)1); 14 | std::iota(inputs2, inputs2 + mipp::N(), (T)1); 15 | 16 | std::mt19937 g; 17 | for (auto t = 0; t < 100; t++) 18 | { 19 | std::shuffle(inputs1, inputs1 + mipp::N(), g); 20 | std::shuffle(inputs2, inputs2 + mipp::N(), g); 21 | 22 | mipp::reg r1 = mipp::load(inputs1); 23 | mipp::reg r2 = mipp::load(inputs2); 24 | mipp::msk m = mipp::cmple(r1, r2); 25 | 26 | mipp::reg r3 = mipp::toreg()>(m); 27 | 28 | for (auto i = 0; i < mipp::N(); i++) 29 | { 30 | if (inputs1[i] <= inputs2[i]) 31 | REQUIRE(mipp::get(r3, i) != (T)0); 32 | else 33 | REQUIRE(mipp::get(r3, i) == (T)0); 34 | } 35 | } 36 | } 37 | 38 | #if !defined(MIPP_NO) && !defined(MIPP_SVE_LS) 39 | TEST_CASE("Compare lower or equal - mipp::reg", "[mipp::cmple]") 40 | { 41 | #if defined(MIPP_64BIT) 42 | SECTION("datatype = double") { test_reg_cmple(); } 43 | #endif 44 | SECTION("datatype = float") { test_reg_cmple(); } 45 | 46 | #if !defined(MIPP_AVX) || (defined(MIPP_AVX) && MIPP_INSTR_VERSION >= 2) 47 | #if !defined(MIPP_SSE) 48 | #if defined(MIPP_64BIT) 49 | SECTION("datatype = int64_t") { test_reg_cmple(); } 50 | SECTION("datatype = uint64_t") { test_reg_cmple(); } 51 | #endif 52 | #endif 53 | SECTION("datatype = int32_t") { test_reg_cmple(); } 54 | SECTION("datatype = uint32_t") { test_reg_cmple(); } 55 | #endif 56 | #if defined(MIPP_BW) 57 | SECTION("datatype = int16_t") { test_reg_cmple(); } 58 | SECTION("datatype = uint16_t") { test_reg_cmple(); } 59 | SECTION("datatype = int8_t") { test_reg_cmple(); } 60 | SECTION("datatype = uint8_t") { test_reg_cmple(); } 61 | #endif 62 | } 63 | #endif 64 | 65 | template 66 | void test_Reg_cmple() 67 | { 68 | T inputs1[mipp::N()], inputs2[mipp::N()]; 69 | std::iota(inputs1, inputs1 + mipp::N(), (T)1); 70 | std::iota(inputs2, inputs2 + mipp::N(), (T)1); 71 | 72 | std::mt19937 g; 73 | for (auto t = 0; t < 100; t++) 74 | { 75 | std::shuffle(inputs1, inputs1 + mipp::N(), g); 76 | std::shuffle(inputs2, inputs2 + mipp::N(), g); 77 | 78 | mipp::Reg r1 = inputs1; 79 | mipp::Reg r2 = inputs2; 80 | mipp::Msk()> m = r1 <= r2; 81 | 82 | for (auto i = 0; i < mipp::N(); i++) 83 | REQUIRE(m[i] == (inputs1[i] <= inputs2[i])); 84 | } 85 | } 86 | 87 | #if !defined(MIPP_SVE_LS) 88 | TEST_CASE("Compare lower or equal - mipp::Reg", "[mipp::cmple]") 89 | { 90 | #if defined(MIPP_64BIT) 91 | SECTION("datatype = double") { test_Reg_cmple(); } 92 | #endif 93 | SECTION("datatype = float") { test_Reg_cmple(); } 94 | 95 | #if !defined(MIPP_AVX) || (defined(MIPP_AVX) && MIPP_INSTR_VERSION >= 2) 96 | #if !defined(MIPP_SSE) 97 | #if defined(MIPP_64BIT) 98 | SECTION("datatype = int64_t") { test_Reg_cmple(); } 99 | SECTION("datatype = uint64_t") { test_Reg_cmple(); } 100 | #endif 101 | #endif 102 | SECTION("datatype = int32_t") { test_Reg_cmple(); } 103 | SECTION("datatype = uint32_t") { test_Reg_cmple(); } 104 | #endif 105 | #if defined(MIPP_BW) 106 | SECTION("datatype = int16_t") { test_Reg_cmple(); } 107 | SECTION("datatype = uint16_t") { test_Reg_cmple(); } 108 | SECTION("datatype = int8_t") { test_Reg_cmple(); } 109 | SECTION("datatype = uint8_t") { test_Reg_cmple(); } 110 | #endif 111 | } 112 | #endif 113 | -------------------------------------------------------------------------------- /tests/src/logical_comparisons/cmplt.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | #include 6 | #include 7 | #include 8 | 9 | template 10 | void test_reg_cmplt() 11 | { 12 | T inputs1[mipp::N()], inputs2[mipp::N()]; 13 | std::iota(inputs1, inputs1 + mipp::N(), (T)1); 14 | std::iota(inputs2, inputs2 + mipp::N(), (T)1); 15 | 16 | std::mt19937 g; 17 | for (auto t = 0; t < 100; t++) 18 | { 19 | std::shuffle(inputs1, inputs1 + mipp::N(), g); 20 | std::shuffle(inputs2, inputs2 + mipp::N(), g); 21 | 22 | mipp::reg r1 = mipp::load(inputs1); 23 | mipp::reg r2 = mipp::load(inputs2); 24 | mipp::msk m = mipp::cmplt(r1, r2); 25 | 26 | mipp::reg r3 = mipp::toreg()>(m); 27 | 28 | for (auto i = 0; i < mipp::N(); i++) 29 | { 30 | if (inputs1[i] < inputs2[i]) 31 | REQUIRE(mipp::get(r3, i) != (T)0); 32 | else 33 | REQUIRE(mipp::get(r3, i) == (T)0); 34 | } 35 | } 36 | } 37 | 38 | #if !defined(MIPP_NO) && !defined(MIPP_SVE_LS) 39 | TEST_CASE("Compare lower than - mipp::reg", "[mipp::cmplt]") 40 | { 41 | #if defined(MIPP_64BIT) 42 | SECTION("datatype = double") { test_reg_cmplt(); } 43 | #endif 44 | SECTION("datatype = float") { test_reg_cmplt(); } 45 | 46 | #if !defined(MIPP_AVX) || (defined(MIPP_AVX) && MIPP_INSTR_VERSION >= 2) 47 | #if !defined(MIPP_SSE) 48 | #if defined(MIPP_64BIT) 49 | SECTION("datatype = int64_t") { test_reg_cmplt(); } 50 | SECTION("datatype = uint64_t") { test_reg_cmplt(); } 51 | #endif 52 | #endif 53 | SECTION("datatype = int32_t") { test_reg_cmplt(); } 54 | SECTION("datatype = uint32_t") { test_reg_cmplt(); } 55 | #endif 56 | #if defined(MIPP_BW) 57 | SECTION("datatype = int16_t") { test_reg_cmplt(); } 58 | SECTION("datatype = uint16_t") { test_reg_cmplt(); } 59 | SECTION("datatype = int8_t") { test_reg_cmplt(); } 60 | SECTION("datatype = uint8_t") { test_reg_cmplt(); } 61 | #endif 62 | } 63 | #endif 64 | 65 | template 66 | void test_Reg_cmplt() 67 | { 68 | T inputs1[mipp::N()], inputs2[mipp::N()]; 69 | std::iota(inputs1, inputs1 + mipp::N(), (T)1); 70 | std::iota(inputs2, inputs2 + mipp::N(), (T)1); 71 | 72 | std::mt19937 g; 73 | for (auto t = 0; t < 100; t++) 74 | { 75 | std::shuffle(inputs1, inputs1 + mipp::N(), g); 76 | std::shuffle(inputs2, inputs2 + mipp::N(), g); 77 | 78 | mipp::Reg r1 = inputs1; 79 | mipp::Reg r2 = inputs2; 80 | mipp::Msk()> m = r1 < r2; 81 | 82 | for (auto i = 0; i < mipp::N(); i++) 83 | REQUIRE(m[i] == (inputs1[i] < inputs2[i])); 84 | } 85 | } 86 | 87 | #if !defined(MIPP_SVE_LS) 88 | TEST_CASE("Compare lower than - mipp::Reg", "[mipp::cmplt]") 89 | { 90 | #if defined(MIPP_64BIT) 91 | SECTION("datatype = double") { test_Reg_cmplt(); } 92 | #endif 93 | SECTION("datatype = float") { test_Reg_cmplt(); } 94 | 95 | #if !defined(MIPP_AVX) || (defined(MIPP_AVX) && MIPP_INSTR_VERSION >= 2) 96 | #if !defined(MIPP_SSE) 97 | #if defined(MIPP_64BIT) 98 | SECTION("datatype = int64_t") { test_Reg_cmplt(); } 99 | SECTION("datatype = uint64_t") { test_Reg_cmplt(); } 100 | #endif 101 | #endif 102 | SECTION("datatype = int32_t") { test_Reg_cmplt(); } 103 | SECTION("datatype = uint32_t") { test_Reg_cmplt(); } 104 | #endif 105 | #if defined(MIPP_BW) 106 | SECTION("datatype = int16_t") { test_Reg_cmplt(); } 107 | SECTION("datatype = uint16_t") { test_Reg_cmplt(); } 108 | SECTION("datatype = int8_t") { test_Reg_cmplt(); } 109 | SECTION("datatype = uint8_t") { test_Reg_cmplt(); } 110 | #endif 111 | } 112 | #endif 113 | -------------------------------------------------------------------------------- /tests/src/logical_comparisons/cmpneq.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | #include 6 | #include 7 | #include 8 | 9 | template 10 | void test_reg_cmpneq() 11 | { 12 | T inputs1[mipp::N()], inputs2[mipp::N()]; 13 | std::iota(inputs1, inputs1 + mipp::N(), (T)1); 14 | std::iota(inputs2, inputs2 + mipp::N(), (T)1); 15 | 16 | std::mt19937 g; 17 | for (auto t = 0; t < 100; t++) 18 | { 19 | std::shuffle(inputs1, inputs1 + mipp::N(), g); 20 | std::shuffle(inputs2, inputs2 + mipp::N(), g); 21 | 22 | mipp::reg r1 = mipp::load(inputs1); 23 | mipp::reg r2 = mipp::load(inputs2); 24 | mipp::msk m = mipp::cmpneq(r1, r2); 25 | 26 | mipp::reg r3 = mipp::toreg()>(m); 27 | 28 | for (auto i = 0; i < mipp::N(); i++) 29 | { 30 | if (inputs1[i] != inputs2[i]) 31 | REQUIRE(mipp::get(r3, i) != (T)0); 32 | else 33 | REQUIRE(mipp::get(r3, i) == (T)0); 34 | } 35 | } 36 | } 37 | 38 | #ifndef MIPP_NO 39 | TEST_CASE("Compare not equal - mipp::reg", "[mipp::cmpneq]") 40 | { 41 | #if defined(MIPP_64BIT) 42 | SECTION("datatype = double") { test_reg_cmpneq(); } 43 | #endif 44 | SECTION("datatype = float") { test_reg_cmpneq(); } 45 | 46 | #if !defined(MIPP_AVX) || defined(MIPP_AVX2) || (defined(MIPP_AVX) && !defined(__INTEL_COMPILER) && !defined(__ICL) && !defined(__ICC)) 47 | #if defined(MIPP_64BIT) 48 | #if !defined(MIPP_SSE) || (defined(MIPP_SSE) && MIPP_INSTR_VERSION >= 41) 49 | SECTION("datatype = int64_t") { test_reg_cmpneq(); } 50 | #endif 51 | #endif 52 | SECTION("datatype = int32_t") { test_reg_cmpneq(); } 53 | #endif 54 | #if defined(MIPP_BW) 55 | SECTION("datatype = int16_t") { test_reg_cmpneq(); } 56 | SECTION("datatype = int8_t") { test_reg_cmpneq(); } 57 | #endif 58 | } 59 | #endif 60 | 61 | template 62 | void test_Reg_cmpneq() 63 | { 64 | T inputs1[mipp::N()], inputs2[mipp::N()]; 65 | std::iota(inputs1, inputs1 + mipp::N(), (T)1); 66 | std::iota(inputs2, inputs2 + mipp::N(), (T)1); 67 | 68 | std::mt19937 g; 69 | for (auto t = 0; t < 100; t++) 70 | { 71 | std::shuffle(inputs1, inputs1 + mipp::N(), g); 72 | std::shuffle(inputs2, inputs2 + mipp::N(), g); 73 | 74 | mipp::Reg r1 = inputs1; 75 | mipp::Reg r2 = inputs2; 76 | mipp::Msk()> m = r1 != r2; 77 | 78 | for (auto i = 0; i < mipp::N(); i++) 79 | REQUIRE(m[i] == (inputs1[i] != inputs2[i])); 80 | } 81 | } 82 | 83 | TEST_CASE("Compare not equal - mipp::Reg", "[mipp::cmpneq]") 84 | { 85 | #if defined(MIPP_64BIT) 86 | SECTION("datatype = double") { test_Reg_cmpneq(); } 87 | #endif 88 | SECTION("datatype = float") { test_Reg_cmpneq(); } 89 | 90 | #if !defined(MIPP_AVX) || defined(MIPP_AVX2) || (defined(MIPP_AVX) && !defined(__INTEL_COMPILER) && !defined(__ICL) && !defined(__ICC)) 91 | #if defined(MIPP_64BIT) 92 | #if !defined(MIPP_SSE) || (defined(MIPP_SSE) && MIPP_INSTR_VERSION >= 41) 93 | SECTION("datatype = int64_t") { test_Reg_cmpneq(); } 94 | #endif 95 | #endif 96 | SECTION("datatype = int32_t") { test_Reg_cmpneq(); } 97 | #endif 98 | #if defined(MIPP_BW) 99 | SECTION("datatype = int16_t") { test_Reg_cmpneq(); } 100 | SECTION("datatype = int8_t") { test_Reg_cmpneq(); } 101 | #endif 102 | } 103 | -------------------------------------------------------------------------------- /tests/src/main.cpp: -------------------------------------------------------------------------------- 1 | #define CATCH_CONFIG_RUNNER 2 | #include 3 | #include 4 | #include 5 | 6 | int main(int argc, char* argv[]) 7 | { 8 | std::cout << "MIPP tests" << std::endl; 9 | std::cout << "----------" << std::endl << std::endl; 10 | 11 | std::cout << "Instr. type: " << mipp::InstructionType << std::endl; 12 | std::cout << "Instr. full type: " << mipp::InstructionFullType << std::endl; 13 | std::cout << "Instr. version: " << mipp::InstructionVersion << std::endl; 14 | std::cout << "Instr. size: " << mipp::RegisterSizeBit << " bits" << std::endl; 15 | std::cout << "Instr. lanes: " << mipp::Lanes << std::endl; 16 | std::cout << "64-bit support: " << (mipp::Support64Bit ? "yes" : "no") << std::endl; 17 | std::cout << "Byte/word support: " << (mipp::SupportByteWord ? "yes" : "no") << std::endl; 18 | auto ext = mipp::InstructionExtensions(); 19 | if (ext.size() > 0) 20 | { 21 | std::cout << "Instr. extensions: {"; 22 | for (auto i = 0; i < (int)ext.size(); i++) 23 | std::cout << ext[i] << (i < ((int)ext.size() -1) ? ", " : ""); 24 | std::cout << "}" << std::endl; 25 | } 26 | std::cout << std::endl; 27 | 28 | int result = Catch::Session().run(argc, argv); 29 | 30 | return result; 31 | } 32 | -------------------------------------------------------------------------------- /tests/src/math_functions/sincos.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | #include 6 | #include 7 | #include 8 | 9 | template 10 | void test_reg_sincos() 11 | { 12 | T inputs1[mipp::N()]; 13 | std::iota(inputs1, inputs1 + mipp::N(), (T)1); 14 | 15 | std::mt19937 g; 16 | std::shuffle(inputs1, inputs1 + mipp::N(), g); 17 | 18 | mipp::reg r1 = mipp::load(inputs1); 19 | mipp::reg r2, r3; 20 | mipp::sincos(r1, r2, r3); 21 | 22 | for (auto i = 0; i < mipp::N(); i++) 23 | { 24 | T res1 = std::sin(inputs1[i]); 25 | T res2 = std::cos(inputs1[i]); 26 | 27 | // REQUIRE(mipp::get(r2, i) == Approx(res1).epsilon(0.001)); 28 | REQUIRE(mipp::get(r2, i) == Approx(res1)); 29 | 30 | // REQUIRE(mipp::get(r3, i) == Approx(res2).epsilon(0.001)); 31 | REQUIRE(mipp::get(r3, i) == Approx(res2)); 32 | } 33 | } 34 | 35 | #if !defined(MIPP_NO) && !defined(MIPP_SVE_LS) 36 | TEST_CASE("Sine & Cosine - mipp::reg", "[mipp::sincos]") 37 | { 38 | SECTION("datatype = float" ) { test_reg_sincos(); } 39 | #if defined(MIPP_64BIT) && (defined(__INTEL_COMPILER) || defined(__ICL) || defined(__ICC)) 40 | SECTION("datatype = double") { test_reg_sincos(); } 41 | #endif 42 | } 43 | #endif 44 | 45 | template 46 | void test_Reg_sincos() 47 | { 48 | T inputs1[mipp::N()]; 49 | std::iota(inputs1, inputs1 + mipp::N(), (T)1); 50 | 51 | std::mt19937 g; 52 | std::shuffle(inputs1, inputs1 + mipp::N(), g); 53 | 54 | mipp::Reg r1 = inputs1; 55 | mipp::Reg r2, r3; 56 | mipp::sincos(r1, r2, r3); 57 | 58 | for (auto i = 0; i < mipp::N(); i++) 59 | { 60 | T res1 = std::sin(inputs1[i]); 61 | T res2 = std::cos(inputs1[i]); 62 | 63 | // REQUIRE(r2[i] == Approx(res1).epsilon(0.001)); 64 | REQUIRE(r2[i] == Approx(res1)); 65 | 66 | // REQUIRE(r3[i] == Approx(res2).epsilon(0.001)); 67 | REQUIRE(r3[i] == Approx(res2)); 68 | } 69 | } 70 | 71 | #if !defined(MIPP_SVE_LS) 72 | TEST_CASE("Sine & Cosine - mipp::Reg", "[mipp::sincos]") 73 | { 74 | SECTION("datatype = float" ) { test_Reg_sincos(); } 75 | #if defined(MIPP_64BIT) && (defined(__INTEL_COMPILER) || defined(__ICL) || defined(__ICC)) 76 | SECTION("datatype = double") { test_Reg_sincos(); } 77 | #endif 78 | } 79 | #endif 80 | -------------------------------------------------------------------------------- /tests/src/memory_operations/blend.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | #include 6 | #include 7 | #include 8 | 9 | template 10 | void test_reg_blend() 11 | { 12 | constexpr int N = mipp::N(); 13 | T inputs1[N], inputs2[N]; 14 | std::iota(inputs1, inputs1 + N, (T)0); 15 | std::iota(inputs2, inputs2 + N, (T)N); 16 | 17 | bool mask[N]; 18 | std::fill(mask, mask + N/2, true ); 19 | std::fill(mask + N/2, mask + N, false); 20 | 21 | std::mt19937 g; 22 | for (auto t = 0; t < 100; t++) 23 | { 24 | std::shuffle(mask, mask + N, g); 25 | 26 | mipp::reg r1 = mipp::load(inputs1); 27 | mipp::reg r2 = mipp::load(inputs2); 28 | mipp::msk m = mipp::set (mask ); 29 | 30 | mipp::reg ri = mipp::blend(r1, r2, m); 31 | 32 | for (auto i = 0; i < N; i++) 33 | REQUIRE(mipp::get(ri, i) == (mask[i] ? inputs1[i] : inputs2[i])); 34 | } 35 | } 36 | 37 | #ifndef MIPP_NO 38 | TEST_CASE("Blend - mipp::reg", "[mipp::blend]") 39 | { 40 | #if defined(MIPP_64BIT) 41 | SECTION("datatype = double") { test_reg_blend(); } 42 | #endif 43 | SECTION("datatype = float") { test_reg_blend(); } 44 | 45 | #if defined(MIPP_64BIT) 46 | SECTION("datatype = int64_t") { test_reg_blend(); } 47 | #endif 48 | SECTION("datatype = int32_t") { test_reg_blend(); } 49 | #if defined(MIPP_BW) 50 | SECTION("datatype = int16_t") { test_reg_blend(); } 51 | SECTION("datatype = int8_t") { test_reg_blend(); } 52 | #endif 53 | } 54 | #endif 55 | 56 | template 57 | void test_Reg_blend() 58 | { 59 | constexpr int N = mipp::N(); 60 | T inputs1[N], inputs2[N]; 61 | std::iota(inputs1, inputs1 + N, (T)0); 62 | std::iota(inputs2, inputs2 + N, (T)N); 63 | 64 | bool mask[N]; 65 | std::fill(mask, mask + N/2, true ); 66 | std::fill(mask + N/2, mask + N, false); 67 | 68 | std::mt19937 g; 69 | for (auto t = 0; t < 100; t++) 70 | { 71 | std::shuffle(mask, mask + N, g); 72 | 73 | mipp::Reg r1 = inputs1; 74 | mipp::Reg r2 = inputs2; 75 | mipp::Msk m = mask; 76 | 77 | mipp::Reg ri = mipp::blend(r1, r2, m); 78 | 79 | for (auto i = 0; i < N; i++) 80 | REQUIRE(ri[i] == (mask[i] ? inputs1[i] : inputs2[i])); 81 | } 82 | } 83 | 84 | TEST_CASE("Blend - mipp::Reg", "[mipp::blend]") 85 | { 86 | #if defined(MIPP_64BIT) 87 | SECTION("datatype = double") { test_Reg_blend(); } 88 | #endif 89 | SECTION("datatype = float") { test_Reg_blend(); } 90 | 91 | #if defined(MIPP_64BIT) 92 | SECTION("datatype = int64_t") { test_Reg_blend(); } 93 | #endif 94 | SECTION("datatype = int32_t") { test_Reg_blend(); } 95 | #if defined(MIPP_BW) 96 | SECTION("datatype = int16_t") { test_Reg_blend(); } 97 | SECTION("datatype = int8_t") { test_Reg_blend(); } 98 | #endif 99 | } -------------------------------------------------------------------------------- /tests/src/memory_operations/combine.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | #include 6 | #include 7 | #include 8 | 9 | template 10 | void test_reg_combine() 11 | { 12 | T inputs[mipp::N()]; 13 | 14 | std::iota(inputs, inputs + mipp::N(), (T)0); 15 | 16 | mipp::reg r1 = mipp::load(inputs); 17 | 18 | mipp::reg_2 r1_lo = mipp::low (r1); 19 | mipp::reg_2 r1_hi = mipp::high(r1); 20 | 21 | mipp::reg r2 = mipp::combine(r1_lo, r1_hi); 22 | 23 | for (auto i = 0; i < mipp::N(); i++) 24 | REQUIRE(mipp::get(r2, i) == inputs[i]); 25 | } 26 | 27 | #if !defined(MIPP_NO) && !defined(MIPP_SVE_LS) 28 | TEST_CASE("Combine - mipp::reg", "[mipp::combine]") 29 | { 30 | #if defined(MIPP_64BIT) 31 | SECTION("datatype = double") { test_reg_combine(); } 32 | #endif 33 | SECTION("datatype = float") { test_reg_combine(); } 34 | 35 | #if defined(MIPP_64BIT) 36 | SECTION("datatype = int64_t") { test_reg_combine(); } 37 | #endif 38 | SECTION("datatype = int32_t") { test_reg_combine(); } 39 | #if defined(MIPP_BW) 40 | SECTION("datatype = int16_t") { test_reg_combine(); } 41 | SECTION("datatype = int8_t") { test_reg_combine(); } 42 | #endif 43 | } 44 | #endif 45 | 46 | template 47 | void test_Reg_combine() 48 | { 49 | T inputs[mipp::N()]; 50 | 51 | std::iota(inputs, inputs + mipp::N(), (T)0); 52 | 53 | mipp::Reg r1 = inputs; 54 | 55 | mipp::Reg_2 r1_lo = r1.low (); 56 | mipp::Reg_2 r1_hi = r1.high(); 57 | 58 | mipp::Reg r2 = mipp::combine(r1_lo, r1_hi); 59 | 60 | for (auto i = 0; i < mipp::N(); i++) 61 | REQUIRE(r2[i] == inputs[i]); 62 | } 63 | 64 | #if !defined(MIPP_NO) && !defined(MIPP_SVE_LS) 65 | TEST_CASE("Combine - mipp::Reg", "[mipp::combine]") 66 | { 67 | #if defined(MIPP_64BIT) 68 | SECTION("datatype = double") { test_Reg_combine(); } 69 | #endif 70 | SECTION("datatype = float") { test_Reg_combine(); } 71 | 72 | #if defined(MIPP_64BIT) 73 | SECTION("datatype = int64_t") { test_Reg_combine(); } 74 | #endif 75 | SECTION("datatype = int32_t") { test_Reg_combine(); } 76 | #if defined(MIPP_BW) 77 | SECTION("datatype = int16_t") { test_Reg_combine(); } 78 | SECTION("datatype = int8_t") { test_Reg_combine(); } 79 | #endif 80 | } 81 | #endif 82 | 83 | #include "../static_for.hpp" 84 | 85 | template 86 | struct sub_test_combine_bis 87 | { 88 | template static inline void func(const mipp::Reg &r1, const mipp::Reg &r2, const T *inputs) 89 | { 90 | mipp::Reg r3 = mipp::combine(r1, r2); 91 | 92 | for (auto i = 0; i < mipp::N(); i++) 93 | REQUIRE(r3[i] == inputs[i + LOOP_INDEX]); 94 | } 95 | }; 96 | 97 | template 98 | void test_Reg_combine_bis() 99 | { 100 | T inputs[2 * mipp::N()]; 101 | 102 | std::iota(inputs, inputs + 2 * mipp::N(), (T)0); 103 | 104 | mipp::Reg r1 = inputs; 105 | mipp::Reg r2 = inputs + mipp::N(); 106 | 107 | static_for(), sub_test_combine_bis>(r1, r2, inputs); 108 | } 109 | 110 | #if defined(MIPP_NEON) || defined(MIPP_SSE) || (defined(MIPP_AVX) && MIPP_INSTR_VERSION >= 2) || defined(MIPP_AVX512F) 111 | #if !defined(MIPP_NO) && !defined(MIPP_SVE_LS) 112 | TEST_CASE("Combine (bis) - mipp::Reg", "[mipp::combine_bis]") 113 | { 114 | #if defined(MIPP_64BIT) 115 | SECTION("datatype = double") { test_Reg_combine_bis(); } 116 | #endif 117 | SECTION("datatype = float") { test_Reg_combine_bis(); } 118 | 119 | #if defined(MIPP_64BIT) 120 | SECTION("datatype = int64_t") { test_Reg_combine_bis(); } 121 | #endif 122 | SECTION("datatype = int32_t") { test_Reg_combine_bis(); } 123 | #if defined(MIPP_BW) 124 | #if !defined(MIPP_SSE) || (defined(MIPP_SSE) && MIPP_INSTR_VERSION >= 31) 125 | SECTION("datatype = int16_t") { test_Reg_combine_bis(); } 126 | #if !defined(MIPP_AVX512) || (defined(MIPP_AVX512) && defined(MIPP_AVX512VBMI)) 127 | SECTION("datatype = int8_t") { test_Reg_combine_bis(); } 128 | #endif 129 | #endif 130 | #endif 131 | } 132 | #endif 133 | #endif 134 | -------------------------------------------------------------------------------- /tests/src/memory_operations/compress.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | #include 6 | #include 7 | #include 8 | #include 9 | 10 | template 11 | void test_reg_compress() 12 | { 13 | constexpr int N = mipp::N(); 14 | 15 | T inputs1[N]; 16 | T expected[N]; 17 | bool mask1[N]; 18 | 19 | std::iota(inputs1, inputs1 + N, (T)1); 20 | mipp::reg r1 = mipp::load(inputs1); 21 | mipp::reg r2 = mipp::set0(); 22 | 23 | std::mt19937 g; 24 | for (auto t = 0; t < 1000; t++) 25 | { 26 | // Generate random mask 27 | int k = 0; 28 | std::fill_n(expected, N, 0); 29 | for (int i = 0; i < N; i++) 30 | { 31 | bool bit = (g() & 1) ? false : true; // Generate random bit 32 | mask1[i] = bit; 33 | if (bit) 34 | expected[k++] = i + (T)1; 35 | } 36 | 37 | mipp::msk mask = mipp::set(mask1); 38 | 39 | r2 = mipp::compress(r1, mask); 40 | 41 | for (auto i = 0; i < N; i++) 42 | REQUIRE(mipp::get(r2, i) == expected[i]); 43 | } 44 | } 45 | 46 | #if defined(MIPP_STATIC_LIB) && !defined(MIPP_NO) 47 | TEST_CASE("Compress - mipp::reg", "[mipp::compress]") 48 | { 49 | #if (defined(MIPP_SSE) && MIPP_INSTR_VERSION >= 31) || defined(MIPP_AVX512) || defined(MIPP_NEON) || (defined(MIPP_AVX2) && defined(MIPP_BMI2)) 50 | #if defined(MIPP_64BIT) 51 | SECTION("datatype = double") { test_reg_compress(); } 52 | #endif 53 | SECTION("datatype = float") { test_reg_compress(); } 54 | #if defined(MIPP_64BIT) 55 | SECTION("datatype = int64_t") { test_reg_compress(); } 56 | #endif 57 | SECTION("datatype = int32_t") { test_reg_compress(); } 58 | #if defined(MIPP_BW) && !defined(MIPP_AVX) 59 | #if !defined(MIPP_AVX512) || (defined(MIPP_AVX512) && defined(MIPP_AVX512VBMI2)) 60 | #if !defined(MIPP_SSE) || (defined(MIPP_SSE) && defined(MIPP_BMI2)) 61 | SECTION("datatype = int16_t") { test_reg_compress(); } 62 | #endif 63 | SECTION("datatype = int8_t") { test_reg_compress(); } 64 | #endif 65 | #endif 66 | #endif 67 | } 68 | #endif 69 | 70 | template 71 | void test_Reg_compress() 72 | { 73 | constexpr int N = mipp::N(); 74 | 75 | T inputs1[N]; 76 | T expected[N]; 77 | bool mask1[N]; 78 | 79 | std::iota(inputs1, inputs1 + N, (T)1); 80 | mipp::Reg r1 = inputs1; 81 | mipp::Reg r2 = (T)0; 82 | 83 | std::mt19937 g; 84 | for (auto t = 0; t < 1000; t++) 85 | { 86 | // Generate random mask 87 | int k = 0; 88 | std::fill_n(expected, N, 0); 89 | for (int i = 0; i < N; i++) 90 | { 91 | bool bit = (g() & 1) ? false : true; // Generate random bit 92 | mask1[i] = bit; 93 | if (bit) 94 | expected[k++] = i + (T)1; 95 | } 96 | 97 | mipp::Msk()> mask = mask1; 98 | 99 | r2 = mipp::compress(r1, mask); 100 | 101 | for (auto i = 0; i < N; i++) 102 | REQUIRE(r2[i] == expected[i]); 103 | } 104 | } 105 | 106 | #if defined(MIPP_STATIC_LIB) && !defined(MIPP_NO) 107 | TEST_CASE("Compress - mipp::Reg", "[mipp::compress]") 108 | { 109 | #if (defined(MIPP_SSE) && MIPP_INSTR_VERSION >= 31) || defined(MIPP_AVX512) || defined(MIPP_NEON) || (defined(MIPP_AVX2) && defined(MIPP_BMI2)) 110 | #if defined(MIPP_64BIT) 111 | SECTION("datatype = double") { test_Reg_compress(); } 112 | #endif 113 | SECTION("datatype = float") { test_Reg_compress(); } 114 | #if defined(MIPP_64BIT) 115 | SECTION("datatype = int64_t") { test_Reg_compress(); } 116 | #endif 117 | SECTION("datatype = int32_t") { test_Reg_compress(); } 118 | #if defined(MIPP_BW) && !defined(MIPP_AVX) 119 | #if !defined(MIPP_AVX512) || (defined(MIPP_AVX512) && defined(MIPP_AVX512VBMI2)) 120 | #if !defined(MIPP_SSE) || (defined(MIPP_SSE) && defined(MIPP_BMI2)) 121 | SECTION("datatype = int16_t") { test_Reg_compress(); } 122 | #endif 123 | SECTION("datatype = int8_t") { test_Reg_compress(); } 124 | #endif 125 | #endif 126 | #endif 127 | } 128 | #endif 129 | -------------------------------------------------------------------------------- /tests/src/memory_operations/deinterleave.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | #include 6 | #include 7 | #include 8 | 9 | template 10 | void test_reg_deinterleave() 11 | { 12 | T inputs1[mipp::N()], inputs2[mipp::N()]; 13 | std::mt19937 g; 14 | std::iota (inputs1, inputs1 + mipp::N(), (T)0); 15 | std::shuffle(inputs1, inputs1 + mipp::N(), g); 16 | std::iota (inputs2, inputs2 + mipp::N(), (T)0); 17 | std::shuffle(inputs2, inputs2 + mipp::N(), g); 18 | 19 | T inputs[2*mipp::N()]; 20 | std::copy(inputs1, inputs1 + mipp::N(), inputs ); 21 | std::copy(inputs2, inputs2 + mipp::N(), inputs + mipp::N()); 22 | 23 | mipp::reg r1 = mipp::load(inputs1); 24 | mipp::reg r2 = mipp::load(inputs2); 25 | 26 | mipp::regx2 ri = mipp::deinterleave(r1, r2); 27 | 28 | for (auto i = 0; i < mipp::N(); i++) 29 | REQUIRE(mipp::get(ri.val[0], i) == inputs[2 * i +0]); 30 | 31 | for (auto i = 0; i < mipp::N(); i++) 32 | REQUIRE(mipp::get(ri.val[1], i) == inputs[2 * i +1]); 33 | } 34 | 35 | #if !defined(MIPP_NO) && !defined(MIPP_SVE_LS) 36 | TEST_CASE("Deinterleave - mipp::reg", "[mipp::deinterleave]") 37 | { 38 | #if defined(MIPP_64BIT) 39 | SECTION("datatype = double") { test_reg_deinterleave(); } 40 | #endif 41 | SECTION("datatype = float") { test_reg_deinterleave(); } 42 | 43 | #if defined(MIPP_64BIT) 44 | SECTION("datatype = int64_t") { test_reg_deinterleave(); } 45 | #endif 46 | SECTION("datatype = int32_t") { test_reg_deinterleave(); } 47 | #if defined(MIPP_BW) 48 | #if !defined(MIPP_SSE) || (defined(MIPP_SSE) && MIPP_INSTR_VERSION >= 31) 49 | SECTION("datatype = int16_t") { test_reg_deinterleave(); } 50 | #if !defined(MIPP_AVX512) || (defined(MIPP_AVX512) && defined(MIPP_AVX512VBMI)) 51 | SECTION("datatype = int8_t") { test_reg_deinterleave(); } 52 | #endif 53 | #endif 54 | #endif 55 | } 56 | #endif 57 | 58 | template 59 | void test_Reg_deinterleave() 60 | { 61 | T inputs1[mipp::N()], inputs2[mipp::N()]; 62 | std::mt19937 g; 63 | std::iota (inputs1, inputs1 + mipp::N(), (T)0); 64 | std::shuffle(inputs1, inputs1 + mipp::N(), g); 65 | std::iota (inputs2, inputs2 + mipp::N(), (T)0); 66 | std::shuffle(inputs2, inputs2 + mipp::N(), g); 67 | 68 | T inputs[2*mipp::N()]; 69 | std::copy(inputs1, inputs1 + mipp::N(), inputs ); 70 | std::copy(inputs2, inputs2 + mipp::N(), inputs + mipp::N()); 71 | 72 | mipp::Reg r1 = inputs1; 73 | mipp::Reg r2 = inputs2; 74 | 75 | mipp::Regx2 ri = mipp::deinterleave(r1, r2); 76 | 77 | for (auto i = 0; i < mipp::N(); i++) 78 | REQUIRE(ri[0][i] == inputs[2 * i +0]); 79 | 80 | for (auto i = 0; i < mipp::N(); i++) 81 | REQUIRE(ri[1][i] == inputs[2 * i +1]); 82 | } 83 | 84 | #if !defined(MIPP_SVE_LS) 85 | TEST_CASE("Deinterleave - mipp::Reg", "[mipp::deinterleave]") 86 | { 87 | #if defined(MIPP_64BIT) 88 | SECTION("datatype = double") { test_Reg_deinterleave(); } 89 | #endif 90 | SECTION("datatype = float") { test_Reg_deinterleave(); } 91 | 92 | #if defined(MIPP_64BIT) 93 | SECTION("datatype = int64_t") { test_Reg_deinterleave(); } 94 | #endif 95 | SECTION("datatype = int32_t") { test_Reg_deinterleave(); } 96 | #if defined(MIPP_BW) 97 | #if !defined(MIPP_SSE) || (defined(MIPP_SSE) && MIPP_INSTR_VERSION >= 31) 98 | SECTION("datatype = int16_t") { test_Reg_deinterleave(); } 99 | #if !defined(MIPP_AVX512) || (defined(MIPP_AVX512) && defined(MIPP_AVX512VBMI)) 100 | SECTION("datatype = int8_t") { test_Reg_deinterleave(); } 101 | #endif 102 | #endif 103 | #endif 104 | } 105 | #endif 106 | -------------------------------------------------------------------------------- /tests/src/memory_operations/gather.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | #include 6 | #include 7 | #include 8 | 9 | template 10 | void test_reg_gather() 11 | { 12 | constexpr int N = mipp::N(); 13 | TD inputs [N*10]; 14 | TI indexes[N*10]; 15 | std::iota(inputs, inputs + N*10, (TD)0); 16 | std::iota(indexes, indexes + N*10, (TI)0); 17 | 18 | std::mt19937 g; 19 | for (auto i = 0; i < 20; i++) 20 | { 21 | std::shuffle(indexes, indexes + N*10, g); 22 | 23 | mipp::reg idx = mipp::loadu(indexes); 24 | mipp::reg r = mipp::gather(inputs, idx); 25 | 26 | for (auto i = 0; i < N; i++) 27 | REQUIRE(+mipp::get(r, i) == +inputs[indexes[i]]); 28 | } 29 | } 30 | 31 | #ifndef MIPP_NO 32 | TEST_CASE("Gather - mipp::reg", "[mipp::gather]") 33 | { 34 | #if defined(MIPP_64BIT) 35 | SECTION("datatype = double") { test_reg_gather(); } 36 | #endif 37 | SECTION("datatype = float") { test_reg_gather(); } 38 | #if defined(MIPP_64BIT) 39 | SECTION("datatype = int64_t") { test_reg_gather(); } 40 | #endif 41 | SECTION("datatype = int32_t") { test_reg_gather(); } 42 | #if defined(MIPP_BW) 43 | SECTION("datatype = int16_t") { test_reg_gather(); } 44 | //SECTION("datatype = int8_t") { test_reg_gather(); } 45 | #endif 46 | } 47 | #endif 48 | 49 | template 50 | void test_Reg_gather() 51 | { 52 | constexpr int N = mipp::N(); 53 | TD inputs [N*10]; 54 | TI indexes[N*10]; 55 | std::iota(inputs, inputs + N*10, (TD)0); 56 | std::iota(indexes, indexes + N*10, (TI)0); 57 | 58 | std::mt19937 g; 59 | for (auto i = 0; i < 20; i++) 60 | { 61 | std::shuffle(indexes, indexes + N*10, g); 62 | 63 | mipp::Reg idx = mipp::oloadu(indexes); 64 | mipp::Reg r = mipp::gather(inputs, idx); 65 | 66 | for (auto i = 0; i < N; i++) 67 | REQUIRE(+r[i] == +inputs[indexes[i]]); 68 | } 69 | } 70 | 71 | TEST_CASE("Gather - mipp::Reg", "[mipp::gather]") 72 | { 73 | #if defined(MIPP_64BIT) 74 | SECTION("datatype = double") { test_Reg_gather(); } 75 | #endif 76 | SECTION("datatype = float") { test_Reg_gather(); } 77 | #if defined(MIPP_64BIT) 78 | SECTION("datatype = int64_t") { test_Reg_gather(); } 79 | #endif 80 | SECTION("datatype = int32_t") { test_Reg_gather(); } 81 | #if defined(MIPP_BW) 82 | SECTION("datatype = int16_t") { test_Reg_gather(); } 83 | //SECTION("datatype = int8_t") { test_Reg_gather(); } 84 | #endif 85 | } 86 | 87 | template 88 | void test_reg_maskzgat() 89 | { 90 | constexpr int N = mipp::N(); 91 | TD inputs [N*10]; 92 | TI indexes[N*10]; 93 | std::iota(inputs, inputs + N*10, (TD)0); 94 | std::iota(indexes, indexes + N*10, (TI)0); 95 | 96 | bool mask[N]; 97 | std::fill(mask, mask + N/2, true ); 98 | std::fill(mask + N/2, mask + N, false); 99 | 100 | std::mt19937 g; 101 | for (auto i = 0; i < 20; i++) 102 | { 103 | std::shuffle(indexes, indexes + N*10, g); 104 | std::shuffle(mask, mask + N, g); 105 | 106 | mipp::msk m = mipp::set(mask); 107 | mipp::reg idx = mipp::loadu(indexes); 108 | mipp::reg r = mipp::maskzgat(m, inputs, idx); 109 | 110 | for (auto i = 0; i < N; i++) 111 | { 112 | if (mask[i]) 113 | REQUIRE(+mipp::get(r, i) == +inputs[indexes[i]]); 114 | else 115 | REQUIRE(+mipp::get(r, i) == (TD)0); 116 | } 117 | } 118 | } 119 | 120 | #ifndef MIPP_NO 121 | TEST_CASE("Masked gather - mipp::reg", "[mipp::maskzgat]") 122 | { 123 | #if defined(MIPP_64BIT) 124 | SECTION("datatype = double") { test_reg_maskzgat(); } 125 | #endif 126 | SECTION("datatype = float") { test_reg_maskzgat(); } 127 | #if defined(MIPP_64BIT) 128 | SECTION("datatype = int64_t") { test_reg_maskzgat(); } 129 | #endif 130 | SECTION("datatype = int32_t") { test_reg_maskzgat(); } 131 | #if defined(MIPP_BW) 132 | SECTION("datatype = int16_t") { test_reg_maskzgat(); } 133 | //SECTION("datatype = int8_t") { test_reg_maskzgat(); } 134 | #endif 135 | } 136 | #endif 137 | 138 | template 139 | void test_Reg_maskzgat() 140 | { 141 | constexpr int N = mipp::N(); 142 | TD inputs [N*10]; 143 | TI indexes[N*10]; 144 | std::iota(inputs, inputs + N*10, (TD)0); 145 | std::iota(indexes, indexes + N*10, (TI)0); 146 | 147 | bool mask[N]; 148 | std::fill(mask, mask + N/2, true ); 149 | std::fill(mask + N/2, mask + N, false); 150 | 151 | std::mt19937 g; 152 | for (auto i = 0; i < 20; i++) 153 | { 154 | std::shuffle(indexes, indexes + N*10, g); 155 | std::shuffle(mask, mask + N, g); 156 | 157 | mipp::Msk m = mipp::oset(mask); 158 | mipp::Reg idx = mipp::oloadu(indexes); 159 | mipp::Reg r = mipp::maskzgat(m, inputs, idx); 160 | 161 | for (auto i = 0; i < N; i++) 162 | { 163 | if (mask[i]) 164 | REQUIRE(+r[i] == +inputs[indexes[i]]); 165 | else 166 | REQUIRE(+r[i] == (TD)0); 167 | } 168 | } 169 | } 170 | 171 | TEST_CASE("Masked gather - mipp::Reg", "[mipp::maskzgat]") 172 | { 173 | #if defined(MIPP_64BIT) 174 | SECTION("datatype = double") { test_Reg_maskzgat(); } 175 | #endif 176 | SECTION("datatype = float") { test_Reg_maskzgat(); } 177 | #if defined(MIPP_64BIT) 178 | SECTION("datatype = int64_t") { test_Reg_maskzgat(); } 179 | #endif 180 | SECTION("datatype = int32_t") { test_Reg_maskzgat(); } 181 | #if defined(MIPP_BW) 182 | SECTION("datatype = int16_t") { test_Reg_maskzgat(); } 183 | //SECTION("datatype = int8_t") { test_Reg_maskzgat(); } 184 | #endif 185 | } -------------------------------------------------------------------------------- /tests/src/memory_operations/get.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | #include 6 | #include 7 | #include 8 | 9 | template 10 | void test_reg_get() 11 | { 12 | T inputs[mipp::N()]; 13 | std::iota(inputs, inputs + mipp::N(), (T)0); 14 | 15 | mipp::reg r = mipp::load(inputs); 16 | 17 | for (auto i = 0; i < mipp::N(); i++) 18 | REQUIRE(mipp::get(r, i) == (T)i); 19 | } 20 | 21 | #if !defined(MIPP_NO) && !defined(MIPP_SVE_LS) 22 | TEST_CASE("Get - mipp::reg", "[mipp::get]") 23 | { 24 | #if defined(MIPP_64BIT) 25 | SECTION("datatype = double") { test_reg_get(); } 26 | #endif 27 | SECTION("datatype = float") { test_reg_get(); } 28 | 29 | #if defined(MIPP_64BIT) 30 | SECTION("datatype = int64_t") { test_reg_get(); } 31 | #endif 32 | SECTION("datatype = int32_t") { test_reg_get(); } 33 | #if defined(MIPP_BW) 34 | SECTION("datatype = int16_t") { test_reg_get(); } 35 | SECTION("datatype = int8_t") { test_reg_get(); } 36 | #endif 37 | } 38 | #endif 39 | 40 | template 41 | void test_Reg_get() 42 | { 43 | T inputs[mipp::N()]; 44 | std::iota(inputs, inputs + mipp::N(), (T)0); 45 | 46 | mipp::Reg r = mipp::oload(inputs); 47 | 48 | for (auto i = 0; i < mipp::N(); i++) 49 | REQUIRE(mipp::get(r, i) == (T)i); 50 | } 51 | 52 | #if !defined(MIPP_SVE_LS) 53 | TEST_CASE("Get - mipp::Reg", "[mipp::get]") 54 | { 55 | #if defined(MIPP_64BIT) 56 | SECTION("datatype = double") { test_Reg_get(); } 57 | #endif 58 | SECTION("datatype = float") { test_Reg_get(); } 59 | 60 | #if defined(MIPP_64BIT) 61 | SECTION("datatype = int64_t") { test_Reg_get(); } 62 | #endif 63 | SECTION("datatype = int32_t") { test_Reg_get(); } 64 | #if defined(MIPP_BW) 65 | SECTION("datatype = int16_t") { test_Reg_get(); } 66 | SECTION("datatype = int8_t") { test_Reg_get(); } 67 | #endif 68 | } 69 | #endif 70 | 71 | template 72 | void test_reg_2_get() 73 | { 74 | T inputs[mipp::N()]; 75 | std::iota(inputs, inputs + mipp::N(), (T)0); 76 | 77 | mipp::reg r = mipp::load(inputs); 78 | 79 | mipp::reg_2 rl = mipp::low(r); 80 | mipp::reg_2 rh = mipp::high(r); 81 | 82 | for (auto i = 0; i < mipp::N()/2; i++) 83 | REQUIRE(mipp::get(rl, i) == (T)i); 84 | 85 | for (auto i = 0; i < mipp::N()/2; i++) 86 | REQUIRE(mipp::get(rh, i) == (T)(i+mipp::N()/2)); 87 | } 88 | 89 | #if !defined(MIPP_NO) && !defined(MIPP_SVE_LS) 90 | TEST_CASE("Get - mipp::reg_2", "[mipp::get]") 91 | { 92 | #if defined(MIPP_64BIT) 93 | SECTION("datatype = double") { test_reg_2_get(); } 94 | #endif 95 | SECTION("datatype = float") { test_reg_2_get(); } 96 | 97 | #if defined(MIPP_64BIT) 98 | SECTION("datatype = int64_t") { test_reg_2_get(); } 99 | #endif 100 | SECTION("datatype = int32_t") { test_reg_2_get(); } 101 | #if defined(MIPP_BW) 102 | SECTION("datatype = int16_t") { test_reg_2_get(); } 103 | SECTION("datatype = int8_t") { test_reg_2_get(); } 104 | #endif 105 | } 106 | #endif 107 | 108 | template 109 | void test_Reg_2_get() 110 | { 111 | T inputs[mipp::N()]; 112 | std::iota(inputs, inputs + mipp::N(), (T)0); 113 | 114 | mipp::Reg r = mipp::oload(inputs); 115 | 116 | mipp::Reg_2 rl = mipp::low(r); 117 | mipp::Reg_2 rh = mipp::high(r); 118 | 119 | for (auto i = 0; i < mipp::N()/2; i++) 120 | REQUIRE(mipp::get(rl, i) == (T)i); 121 | 122 | for (auto i = 0; i < mipp::N()/2; i++) 123 | REQUIRE(mipp::get(rh, i) == (T)(i+mipp::N()/2)); 124 | } 125 | 126 | #if !defined(MIPP_SVE_LS) 127 | TEST_CASE("Get - mipp::Reg_2", "[mipp::get]") 128 | { 129 | #if defined(MIPP_64BIT) 130 | SECTION("datatype = double") { test_Reg_2_get(); } 131 | #endif 132 | SECTION("datatype = float") { test_Reg_2_get(); } 133 | 134 | #if defined(MIPP_64BIT) 135 | SECTION("datatype = int64_t") { test_Reg_2_get(); } 136 | #endif 137 | SECTION("datatype = int32_t") { test_Reg_2_get(); } 138 | #if defined(MIPP_BW) 139 | SECTION("datatype = int16_t") { test_Reg_2_get(); } 140 | SECTION("datatype = int8_t") { test_Reg_2_get(); } 141 | #endif 142 | } 143 | #endif 144 | 145 | template 146 | void test_msk_get() 147 | { 148 | constexpr int N = mipp::N(); 149 | 150 | bool mask[N]; 151 | std::fill(mask, mask + N/2, true ); 152 | std::fill(mask + N/2, mask + N, false); 153 | 154 | std::mt19937 g; 155 | std::shuffle(mask, mask + N, g); 156 | 157 | mipp::msk m = mipp::set(mask); 158 | 159 | for (auto i = 0; i < mipp::N(); i++) 160 | REQUIRE(mipp::get(m, i) == mask[i]); 161 | } 162 | 163 | #if !defined(MIPP_NO) && !defined(MIPP_SVE_LS) 164 | TEST_CASE("Get - mipp::msk", "[mipp::get]") 165 | { 166 | #if defined(MIPP_64BIT) 167 | SECTION("datatype = double") { test_msk_get(); } 168 | #endif 169 | SECTION("datatype = float") { test_msk_get(); } 170 | 171 | #if defined(MIPP_64BIT) 172 | SECTION("datatype = int64_t") { test_msk_get(); } 173 | #endif 174 | SECTION("datatype = int32_t") { test_msk_get(); } 175 | #if defined(MIPP_BW) 176 | SECTION("datatype = int16_t") { test_msk_get(); } 177 | SECTION("datatype = int8_t") { test_msk_get(); } 178 | #endif 179 | } 180 | #endif 181 | 182 | template 183 | void test_Msk_get() 184 | { 185 | constexpr int N = mipp::N(); 186 | 187 | bool mask[N]; 188 | std::fill(mask, mask + N/2, true ); 189 | std::fill(mask + N/2, mask + N, false); 190 | 191 | std::mt19937 g; 192 | std::shuffle(mask, mask + N, g); 193 | 194 | mipp::Msk m = mipp::oset(mask); 195 | 196 | for (auto i = 0; i < mipp::N(); i++) 197 | REQUIRE(mipp::get(m, i) == mask[i]); 198 | } 199 | 200 | #if !defined(MIPP_SVE_LS) 201 | TEST_CASE("Get - mipp::Msk", "[mipp::get]") 202 | { 203 | #if defined(MIPP_64BIT) 204 | SECTION("datatype = double") { test_Msk_get(); } 205 | #endif 206 | SECTION("datatype = float") { test_Msk_get(); } 207 | 208 | #if defined(MIPP_64BIT) 209 | SECTION("datatype = int64_t") { test_Msk_get(); } 210 | #endif 211 | SECTION("datatype = int32_t") { test_Msk_get(); } 212 | #if defined(MIPP_BW) 213 | SECTION("datatype = int16_t") { test_Msk_get(); } 214 | SECTION("datatype = int8_t") { test_Msk_get(); } 215 | #endif 216 | } 217 | #endif 218 | -------------------------------------------------------------------------------- /tests/src/memory_operations/high.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | #include 6 | #include 7 | #include 8 | 9 | template 10 | void test_reg_high() 11 | { 12 | T inputs[mipp::N()]; 13 | std::iota(inputs, inputs + mipp::N(), (T)0); 14 | 15 | mipp::reg r = mipp::load(inputs); 16 | mipp::reg_2 r_2 = mipp::high (r); 17 | 18 | for (auto i = 0; i < mipp::N()/2; i++) 19 | REQUIRE(mipp::get(r_2, i) == inputs[mipp::N()/2 +i]); 20 | } 21 | 22 | #if !defined(MIPP_NO) && !defined(MIPP_SVE_LS) 23 | TEST_CASE("High - mipp::reg", "[mipp::high]") 24 | { 25 | #if defined(MIPP_64BIT) 26 | SECTION("datatype = double") { test_reg_high(); } 27 | #endif 28 | SECTION("datatype = float") { test_reg_high(); } 29 | 30 | #if defined(MIPP_64BIT) 31 | SECTION("datatype = int64_t") { test_reg_high(); } 32 | #endif 33 | SECTION("datatype = int32_t") { test_reg_high(); } 34 | #if defined(MIPP_BW) 35 | SECTION("datatype = int16_t") { test_reg_high(); } 36 | SECTION("datatype = int8_t") { test_reg_high(); } 37 | #endif 38 | } 39 | #endif 40 | 41 | template 42 | void test_Reg_high() 43 | { 44 | T inputs[mipp::N()]; 45 | std::iota(inputs, inputs + mipp::N(), (T)0); 46 | 47 | mipp::Reg r = inputs; 48 | mipp::Reg_2 r_2 = r.high(); 49 | 50 | for (auto i = 0; i < mipp::N()/2; i++) 51 | REQUIRE(r_2[i] == inputs[mipp::N()/2 +i]); 52 | } 53 | 54 | #if !defined(MIPP_SVE_LS) 55 | TEST_CASE("High - mipp::Reg", "[mipp::high]") 56 | { 57 | #if defined(MIPP_64BIT) 58 | SECTION("datatype = double") { test_Reg_high(); } 59 | #endif 60 | SECTION("datatype = float") { test_Reg_high(); } 61 | 62 | #if defined(MIPP_64BIT) 63 | SECTION("datatype = int64_t") { test_Reg_high(); } 64 | #endif 65 | SECTION("datatype = int32_t") { test_Reg_high(); } 66 | #if defined(MIPP_BW) 67 | SECTION("datatype = int16_t") { test_Reg_high(); } 68 | SECTION("datatype = int8_t") { test_Reg_high(); } 69 | #endif 70 | } 71 | #endif 72 | -------------------------------------------------------------------------------- /tests/src/memory_operations/low.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | #include 6 | #include 7 | #include 8 | 9 | template 10 | void test_reg_low() 11 | { 12 | T inputs[mipp::N()]; 13 | std::iota(inputs, inputs + mipp::N(), (T)0); 14 | 15 | mipp::reg r = mipp::load(inputs); 16 | mipp::reg_2 r_2 = mipp::low (r); 17 | 18 | for (auto i = 0; i < mipp::N()/2; i++) 19 | REQUIRE(mipp::get(r_2, i) == inputs[i]); 20 | } 21 | 22 | #if !defined(MIPP_NO) && !defined(MIPP_SVE_LS) 23 | TEST_CASE("Low - mipp::reg", "[mipp::low]") 24 | { 25 | #if defined(MIPP_64BIT) 26 | SECTION("datatype = double") { test_reg_low(); } 27 | #endif 28 | SECTION("datatype = float") { test_reg_low(); } 29 | 30 | #if defined(MIPP_64BIT) 31 | SECTION("datatype = int64_t") { test_reg_low(); } 32 | #endif 33 | SECTION("datatype = int32_t") { test_reg_low(); } 34 | #if defined(MIPP_BW) 35 | SECTION("datatype = int16_t") { test_reg_low(); } 36 | SECTION("datatype = int8_t") { test_reg_low(); } 37 | #endif 38 | } 39 | #endif 40 | 41 | template 42 | void test_Reg_low() 43 | { 44 | T inputs[mipp::N()]; 45 | std::iota(inputs, inputs + mipp::N(), (T)0); 46 | 47 | mipp::Reg r = inputs; 48 | mipp::Reg_2 r_2 = r.low(); 49 | 50 | for (auto i = 0; i < mipp::N()/2; i++) 51 | REQUIRE(r_2[i] == inputs[i]); 52 | } 53 | 54 | #if !defined(MIPP_SVE_LS) 55 | TEST_CASE("Low - mipp::Reg", "[mipp::low]") 56 | { 57 | #if defined(MIPP_64BIT) 58 | SECTION("datatype = double") { test_Reg_low(); } 59 | #endif 60 | SECTION("datatype = float") { test_Reg_low(); } 61 | 62 | #if defined(MIPP_64BIT) 63 | SECTION("datatype = int64_t") { test_Reg_low(); } 64 | #endif 65 | SECTION("datatype = int32_t") { test_Reg_low(); } 66 | #if defined(MIPP_BW) 67 | SECTION("datatype = int16_t") { test_Reg_low(); } 68 | SECTION("datatype = int8_t") { test_Reg_low(); } 69 | #endif 70 | } 71 | #endif 72 | -------------------------------------------------------------------------------- /tests/src/memory_operations/lrot.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | #include 6 | #include 7 | #include 8 | 9 | template 10 | void test_reg_lrot() 11 | { 12 | constexpr int N = mipp::N(); 13 | T inputs1[N]; 14 | std::iota(inputs1, inputs1 + N, (T)0); 15 | 16 | mipp::reg r1 = mipp::load(inputs1); 17 | mipp::reg r2 = mipp::lrot(r1); 18 | 19 | for (auto i = 0; i < N; i++) 20 | REQUIRE(mipp::get(r2, i) == (i == N-1 ? inputs1[0] : inputs1[i+1])); 21 | } 22 | 23 | #if !defined(MIPP_NO) && !defined(MIPP_SVE_LS) 24 | TEST_CASE("Left rotation (cyclic permutation) - mipp::reg", "[mipp::lrot]") 25 | { 26 | #if defined(MIPP_64BIT) 27 | SECTION("datatype = double") { test_reg_lrot(); } 28 | #endif 29 | SECTION("datatype = float") { test_reg_lrot(); } 30 | 31 | #if defined(MIPP_64BIT) 32 | SECTION("datatype = int64_t") { test_reg_lrot(); } 33 | #endif 34 | SECTION("datatype = int32_t") { test_reg_lrot(); } 35 | #if defined(MIPP_BW) 36 | #if (!defined(MIPP_SSE) && !defined(MIPP_AVX512)) || (defined(MIPP_SSE) && MIPP_INSTR_VERSION >= 31) || (defined(MIPP_AVX512) && defined(MIPP_AVX512BW)) 37 | SECTION("datatype = int16_t") { test_reg_lrot(); } 38 | #if !defined(MIPP_AVX512) || (defined(MIPP_AVX512) && defined(MIPP_AVX512VBMI)) 39 | SECTION("datatype = int8_t") { test_reg_lrot(); } 40 | #endif 41 | #endif 42 | #endif 43 | } 44 | #endif 45 | 46 | template 47 | void test_Reg_lrot() 48 | { 49 | constexpr int N = mipp::N(); 50 | T inputs1[N]; 51 | std::iota(inputs1, inputs1 + N, (T)0); 52 | 53 | mipp::Reg r1 = inputs1; 54 | mipp::Reg r2 = mipp::lrot(r1); 55 | 56 | for (auto i = 0; i < N; i++) 57 | REQUIRE(r2[i] == (i == N-1 ? inputs1[0] : inputs1[i+1])); 58 | } 59 | 60 | #if !defined(MIPP_SVE_LS) 61 | TEST_CASE("Left rotation (cyclic permutation) - mipp::Reg", "[mipp::lrot]") 62 | { 63 | #if defined(MIPP_64BIT) 64 | SECTION("datatype = double") { test_Reg_lrot(); } 65 | #endif 66 | SECTION("datatype = float") { test_Reg_lrot(); } 67 | 68 | #if defined(MIPP_64BIT) 69 | SECTION("datatype = int64_t") { test_Reg_lrot(); } 70 | #endif 71 | SECTION("datatype = int32_t") { test_Reg_lrot(); } 72 | #if defined(MIPP_BW) 73 | #if (!defined(MIPP_SSE) && !defined(MIPP_AVX512)) || (defined(MIPP_SSE) && MIPP_INSTR_VERSION >= 31) || (defined(MIPP_AVX512) && defined(MIPP_AVX512BW)) 74 | SECTION("datatype = int16_t") { test_Reg_lrot(); } 75 | #if !defined(MIPP_AVX512) || (defined(MIPP_AVX512) && defined(MIPP_AVX512VBMI)) 76 | SECTION("datatype = int8_t") { test_Reg_lrot(); } 77 | #endif 78 | #endif 79 | #endif 80 | } 81 | #endif 82 | -------------------------------------------------------------------------------- /tests/src/memory_operations/rrot.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | #include 6 | #include 7 | #include 8 | 9 | template 10 | void test_reg_rrot() 11 | { 12 | constexpr int N = mipp::N(); 13 | T inputs1[N]; 14 | std::iota(inputs1, inputs1 + N, (T)0); 15 | 16 | mipp::reg r1 = mipp::load(inputs1); 17 | mipp::reg r2 = mipp::rrot(r1); 18 | 19 | for (auto i = 0; i < N; i++) 20 | REQUIRE(mipp::get(r2, i) == (i == 0 ? inputs1[N-1] : inputs1[i-1])); 21 | } 22 | 23 | #if !defined(MIPP_NO) && !defined(MIPP_SVE_LS) 24 | TEST_CASE("Right rotation (cyclic permutation) - mipp::reg", "[mipp::rrot]") 25 | { 26 | #if defined(MIPP_64BIT) 27 | SECTION("datatype = double") { test_reg_rrot(); } 28 | #endif 29 | SECTION("datatype = float") { test_reg_rrot(); } 30 | 31 | #if defined(MIPP_64BIT) 32 | SECTION("datatype = int64_t") { test_reg_rrot(); } 33 | #endif 34 | SECTION("datatype = int32_t") { test_reg_rrot(); } 35 | #if defined(MIPP_BW) 36 | #if (!defined(MIPP_SSE) && !defined(MIPP_AVX512)) || (defined(MIPP_SSE) && MIPP_INSTR_VERSION >= 31) || (defined(MIPP_AVX512) && defined(MIPP_AVX512BW)) 37 | SECTION("datatype = int16_t") { test_reg_rrot(); } 38 | #if !defined(MIPP_AVX512) || (defined(MIPP_AVX512) && defined(MIPP_AVX512VBMI)) 39 | SECTION("datatype = int8_t") { test_reg_rrot(); } 40 | #endif 41 | #endif 42 | #endif 43 | } 44 | #endif 45 | 46 | template 47 | void test_Reg_rrot() 48 | { 49 | constexpr int N = mipp::N(); 50 | T inputs1[N]; 51 | std::iota(inputs1, inputs1 + N, (T)0); 52 | 53 | mipp::Reg r1 = inputs1; 54 | mipp::Reg r2 = mipp::rrot(r1); 55 | 56 | for (auto i = 0; i < N; i++) 57 | REQUIRE(r2[i] == (i == 0 ? inputs1[N-1] : inputs1[i-1])); 58 | } 59 | 60 | #if !defined(MIPP_SVE_LS) 61 | TEST_CASE("Right rotation (cyclic permutation) - mipp::Reg", "[mipp::rrot]") 62 | { 63 | #if defined(MIPP_64BIT) 64 | SECTION("datatype = double") { test_Reg_rrot(); } 65 | #endif 66 | SECTION("datatype = float") { test_Reg_rrot(); } 67 | 68 | #if defined(MIPP_64BIT) 69 | SECTION("datatype = int64_t") { test_Reg_rrot(); } 70 | #endif 71 | SECTION("datatype = int32_t") { test_Reg_rrot(); } 72 | #if defined(MIPP_BW) 73 | #if (!defined(MIPP_SSE) && !defined(MIPP_AVX512)) || (defined(MIPP_SSE) && MIPP_INSTR_VERSION >= 31) || (defined(MIPP_AVX512) && defined(MIPP_AVX512BW)) 74 | SECTION("datatype = int16_t") { test_Reg_rrot(); } 75 | #if !defined(MIPP_AVX512) || (defined(MIPP_AVX512) && defined(MIPP_AVX512VBMI)) 76 | SECTION("datatype = int8_t") { test_Reg_rrot(); } 77 | #endif 78 | #endif 79 | #endif 80 | } 81 | #endif 82 | -------------------------------------------------------------------------------- /tests/src/reductions/hadd.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | #include 6 | #include 7 | #include 8 | 9 | template 10 | void test_reg_hadd_int() 11 | { 12 | constexpr int N = mipp::N(); 13 | T inputs1[N]; 14 | std::mt19937 g; 15 | std::uniform_int_distribution dis((T)-10, (T)10); 16 | 17 | for (auto j = 0; j < 100; j++) 18 | { 19 | for (auto i = 0; i < N; i++) 20 | inputs1[i] = dis(g); 21 | 22 | mipp::reg r1 = mipp::loadu(inputs1); 23 | auto val = mipp::hadd(r1); 24 | 25 | int64_t acc = inputs1[0]; 26 | for (auto i = 1; i < N; i++) 27 | acc += (int64_t)inputs1[i]; 28 | 29 | T res = (T)std::max(std::min(acc, (int64_t)std::numeric_limits::max()), 30 | (int64_t)std::numeric_limits::min()); 31 | 32 | REQUIRE(res == val); 33 | } 34 | } 35 | 36 | template 37 | void test_reg_hadd_real() 38 | { 39 | constexpr int N = mipp::N(); 40 | T inputs1[N]; 41 | std::mt19937 g; 42 | std::uniform_real_distribution dis(-100, 100); 43 | 44 | for (auto j = 0; j < 100; j++) 45 | { 46 | for (auto i = 0; i < N; i++) 47 | inputs1[i] = dis(g); 48 | 49 | mipp::reg r1 = mipp::loadu(inputs1); 50 | auto val = mipp::hadd(r1); 51 | 52 | auto res = inputs1[0]; 53 | for (auto i = 1; i < N; i++) 54 | res += inputs1[i]; 55 | 56 | // REQUIRE(res == Approx(val).epsilon(0.001)); 57 | REQUIRE(res == Approx(val)); 58 | } 59 | } 60 | 61 | #ifndef MIPP_NO 62 | TEST_CASE("Horizontal addition - mipp::reg", "[mipp::hadd]") 63 | { 64 | #if defined(MIPP_64BIT) 65 | SECTION("datatype = double") { test_reg_hadd_real(); } 66 | #endif 67 | SECTION("datatype = float") { test_reg_hadd_real(); } 68 | 69 | #if !defined(MIPP_SVE_LS) 70 | #if !defined(MIPP_AVX) || (defined(MIPP_AVX) && MIPP_INSTR_VERSION >= 2) 71 | #if defined(MIPP_64BIT) 72 | SECTION("datatype = int64_t") { test_reg_hadd_int(); } 73 | #endif 74 | SECTION("datatype = int32_t") { test_reg_hadd_int(); } 75 | #endif 76 | #if defined(MIPP_BW) 77 | #if !defined(MIPP_SSE) || (defined(MIPP_SSE) && MIPP_INSTR_VERSION >= 31) 78 | SECTION("datatype = int16_t") { test_reg_hadd_int(); } 79 | #ifndef _MSC_VER 80 | SECTION("datatype = int8_t") { test_reg_hadd_int(); } 81 | #endif 82 | #endif 83 | #endif 84 | #endif 85 | } 86 | #endif 87 | 88 | template 89 | void test_Reg_hadd_int() 90 | { 91 | constexpr int N = mipp::N(); 92 | T inputs1[N]; 93 | std::mt19937 g; 94 | std::uniform_int_distribution dis((T)-10, (T)10); 95 | 96 | for (auto j = 0; j < 100; j++) 97 | { 98 | for (auto i = 0; i < N; i++) 99 | inputs1[i] = dis(g); 100 | 101 | mipp::Reg r1 = inputs1; 102 | auto val = mipp::hadd(r1); 103 | 104 | int64_t acc = inputs1[0]; 105 | for (auto i = 1; i < N; i++) 106 | acc += (int64_t)inputs1[i]; 107 | 108 | T res = (T)std::max(std::min(acc, (int64_t)std::numeric_limits::max()), 109 | (int64_t)std::numeric_limits::min()); 110 | 111 | REQUIRE(res == val); 112 | } 113 | } 114 | 115 | template 116 | void test_Reg_hadd_real() 117 | { 118 | constexpr int N = mipp::N(); 119 | T inputs1[N]; 120 | std::mt19937 g; 121 | std::uniform_real_distribution dis(-100, 100); 122 | 123 | for (auto j = 0; j < 100; j++) 124 | { 125 | for (auto i = 0; i < N; i++) 126 | inputs1[i] = dis(g); 127 | 128 | mipp::Reg r1 = inputs1; 129 | auto val = mipp::hadd(r1); 130 | 131 | auto res = inputs1[0]; 132 | for (auto i = 1; i < N; i++) 133 | res += inputs1[i]; 134 | 135 | // REQUIRE(res == Approx(val).epsilon(0.001)); 136 | REQUIRE(res == Approx(val)); 137 | } 138 | } 139 | 140 | TEST_CASE("Horizontal addition - mipp::Reg", "[mipp::hadd]") 141 | { 142 | #if defined(MIPP_64BIT) 143 | SECTION("datatype = double") { test_Reg_hadd_real(); } 144 | #endif 145 | SECTION("datatype = float") { test_Reg_hadd_real(); } 146 | 147 | #if !defined(MIPP_SVE_LS) 148 | #if !defined(MIPP_AVX) || (defined(MIPP_AVX) && MIPP_INSTR_VERSION >= 2) 149 | #if defined(MIPP_64BIT) 150 | SECTION("datatype = int64_t") { test_Reg_hadd_int(); } 151 | #endif 152 | SECTION("datatype = int32_t") { test_Reg_hadd_int(); } 153 | #endif 154 | #if defined(MIPP_BW) 155 | #if !defined(MIPP_SSE) || (defined(MIPP_SSE) && MIPP_INSTR_VERSION >= 31) 156 | SECTION("datatype = int16_t") { test_Reg_hadd_int(); } 157 | #ifndef _MSC_VER 158 | SECTION("datatype = int8_t") { test_Reg_hadd_int(); } 159 | #endif 160 | #endif 161 | #endif 162 | #endif 163 | } -------------------------------------------------------------------------------- /tests/src/reductions/hmax.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | #include 6 | #include 7 | #include 8 | 9 | template 10 | void test_reg_hmax_int() 11 | { 12 | constexpr int N = mipp::N(); 13 | T inputs1[N]; 14 | std::mt19937 g; 15 | std::uniform_int_distribution dis(std::numeric_limits::min(), std::numeric_limits::max()); 16 | 17 | for (auto j = 0; j < 100; j++) 18 | { 19 | for (auto i = 0; i < N; i++) 20 | inputs1[i] = dis(g); 21 | 22 | mipp::reg r1 = mipp::loadu(inputs1); 23 | auto val = mipp::hmax(r1); 24 | 25 | auto res = inputs1[0]; 26 | for (auto i = 1; i < N; i++) 27 | res = std::max(res, inputs1[i]); 28 | 29 | REQUIRE(res == val); 30 | } 31 | } 32 | 33 | template 34 | void test_reg_hmax_real() 35 | { 36 | constexpr int N = mipp::N(); 37 | T inputs1[N]; 38 | std::mt19937 g; 39 | std::uniform_real_distribution dis(std::numeric_limits::min(), std::numeric_limits::max()); 40 | 41 | for (auto j = 0; j < 100; j++) 42 | { 43 | for (auto i = 0; i < N; i++) 44 | inputs1[i] = dis(g); 45 | 46 | mipp::reg r1 = mipp::loadu(inputs1); 47 | auto val = mipp::hmax(r1); 48 | 49 | auto res = inputs1[0]; 50 | for (auto i = 1; i < N; i++) 51 | res = std::max(res, inputs1[i]); 52 | 53 | REQUIRE(res == val); 54 | } 55 | } 56 | 57 | #if !defined(MIPP_NO) && !defined(MIPP_SVE_LS) 58 | TEST_CASE("Horizontal maximum - mipp::reg", "[mipp::hmax]") 59 | { 60 | #if defined(MIPP_64BIT) 61 | SECTION("datatype = double") { test_reg_hmax_real(); } 62 | #endif 63 | SECTION("datatype = float") { test_reg_hmax_real(); } 64 | 65 | #if defined(MIPP_64BIT) 66 | #if !defined(MIPP_SSE) && !defined(MIPP_AVX) && !defined(MIPP_NEON) 67 | SECTION("datatype = int64_t") { test_reg_hmax_int(); } 68 | #endif 69 | #endif 70 | #if !defined(MIPP_AVX) || (defined(MIPP_AVX) && MIPP_INSTR_VERSION >= 2) 71 | #if !defined(MIPP_SSE) || (defined(MIPP_SSE) && MIPP_INSTR_VERSION >= 41) 72 | SECTION("datatype = int32_t") { test_reg_hmax_int(); } 73 | #endif 74 | #endif 75 | #if defined(MIPP_BW) 76 | #if !defined(MIPP_SSE) || (defined(MIPP_SSE) && MIPP_INSTR_VERSION >= 31) 77 | SECTION("datatype = int16_t") { test_reg_hmax_int(); } 78 | #endif 79 | #if !defined(MIPP_SSE) || (defined(MIPP_SSE) && MIPP_INSTR_VERSION >= 41) 80 | #ifndef _MSC_VER 81 | SECTION("datatype = int8_t") { test_reg_hmax_int(); } 82 | #endif 83 | #endif 84 | #endif 85 | } 86 | #endif 87 | 88 | template 89 | void test_Reg_hmax_int() 90 | { 91 | constexpr int N = mipp::N(); 92 | T inputs1[N]; 93 | std::mt19937 g; 94 | std::uniform_int_distribution dis(std::numeric_limits::min(), std::numeric_limits::max()); 95 | 96 | for (auto j = 0; j < 100; j++) 97 | { 98 | for (auto i = 0; i < N; i++) 99 | inputs1[i] = dis(g); 100 | 101 | mipp::Reg r1 = inputs1; 102 | auto val = mipp::hmax(r1); 103 | 104 | auto res = inputs1[0]; 105 | for (auto i = 1; i < N; i++) 106 | res = std::max(res, inputs1[i]); 107 | 108 | REQUIRE(res == val); 109 | } 110 | } 111 | 112 | template 113 | void test_Reg_hmax_real() 114 | { 115 | constexpr int N = mipp::N(); 116 | T inputs1[N]; 117 | std::mt19937 g; 118 | std::uniform_real_distribution dis(std::numeric_limits::min(), std::numeric_limits::max()); 119 | 120 | for (auto j = 0; j < 100; j++) 121 | { 122 | for (auto i = 0; i < N; i++) 123 | inputs1[i] = dis(g); 124 | 125 | mipp::Reg r1 = inputs1; 126 | auto val = mipp::hmax(r1); 127 | 128 | auto res = inputs1[0]; 129 | for (auto i = 1; i < N; i++) 130 | res = std::max(res, inputs1[i]); 131 | 132 | REQUIRE(res == val); 133 | } 134 | } 135 | 136 | #if !defined(MIPP_SVE_LS) 137 | TEST_CASE("Horizontal maximum - mipp::Reg", "[mipp::hmax]") 138 | { 139 | #if defined(MIPP_64BIT) 140 | SECTION("datatype = double") { test_Reg_hmax_real(); } 141 | #endif 142 | SECTION("datatype = float") { test_Reg_hmax_real(); } 143 | 144 | #if defined(MIPP_64BIT) 145 | #if !defined(MIPP_SSE) && !defined(MIPP_AVX) && !defined(MIPP_NEON) 146 | SECTION("datatype = int64_t") { test_Reg_hmax_int(); } 147 | #endif 148 | #endif 149 | #if !defined(MIPP_AVX) || (defined(MIPP_AVX) && MIPP_INSTR_VERSION >= 2) 150 | #if !defined(MIPP_SSE) || (defined(MIPP_SSE) && MIPP_INSTR_VERSION >= 41) 151 | SECTION("datatype = int32_t") { test_Reg_hmax_int(); } 152 | #endif 153 | #endif 154 | #if defined(MIPP_BW) 155 | #if !defined(MIPP_SSE) || (defined(MIPP_SSE) && MIPP_INSTR_VERSION >= 31) 156 | SECTION("datatype = int16_t") { test_Reg_hmax_int(); } 157 | #endif 158 | #if !defined(MIPP_SSE) || (defined(MIPP_SSE) && MIPP_INSTR_VERSION >= 41) 159 | #ifndef _MSC_VER 160 | SECTION("datatype = int8_t") { test_Reg_hmax_int(); } 161 | #endif 162 | #endif 163 | #endif 164 | } 165 | #endif 166 | -------------------------------------------------------------------------------- /tests/src/reductions/hmin.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | #include 6 | #include 7 | #include 8 | 9 | template 10 | void test_reg_hmin_int() 11 | { 12 | constexpr int N = mipp::N(); 13 | T inputs1[N]; 14 | std::mt19937 g; 15 | std::uniform_int_distribution dis(std::numeric_limits::min(), std::numeric_limits::max()); 16 | 17 | for (auto j = 0; j < 100; j++) 18 | { 19 | for (auto i = 0; i < N; i++) 20 | inputs1[i] = dis(g); 21 | 22 | mipp::reg r1 = mipp::loadu(inputs1); 23 | auto val = mipp::hmin(r1); 24 | 25 | auto res = inputs1[0]; 26 | for (auto i = 1; i < N; i++) 27 | res = std::min(res, inputs1[i]); 28 | 29 | REQUIRE(res == val); 30 | } 31 | } 32 | 33 | template 34 | void test_reg_hmin_real() 35 | { 36 | constexpr int N = mipp::N(); 37 | T inputs1[N]; 38 | std::mt19937 g; 39 | 40 | std::uniform_real_distribution dis(std::numeric_limits::min(), std::numeric_limits::max()); 41 | 42 | for (auto j = 0; j < 100; j++) 43 | { 44 | for (auto i = 0; i < N; i++) 45 | inputs1[i] = dis(g); 46 | 47 | mipp::reg r1 = mipp::loadu(inputs1); 48 | auto val = mipp::hmin(r1); 49 | 50 | auto res = inputs1[0]; 51 | for (auto i = 1; i < N; i++) 52 | res = std::min(res, inputs1[i]); 53 | 54 | REQUIRE(res == val); 55 | } 56 | } 57 | 58 | #if !defined(MIPP_NO) && !defined(MIPP_SVE_LS) 59 | TEST_CASE("Horizontal minimum - mipp::reg", "[mipp::hmin]") 60 | { 61 | #if defined(MIPP_64BIT) 62 | SECTION("datatype = double") { test_reg_hmin_real(); } 63 | #endif 64 | SECTION("datatype = float") { test_reg_hmin_real(); } 65 | 66 | #if defined(MIPP_64BIT) 67 | #if !defined(MIPP_SSE) && !defined(MIPP_AVX) && !defined(MIPP_NEON) 68 | SECTION("datatype = int64_t") { test_reg_hmin_int(); } 69 | #endif 70 | #endif 71 | #if !defined(MIPP_AVX) || (defined(MIPP_AVX) && MIPP_INSTR_VERSION >= 2) 72 | #if !defined(MIPP_SSE) || (defined(MIPP_SSE) && MIPP_INSTR_VERSION >= 41) 73 | SECTION("datatype = int32_t") { test_reg_hmin_int(); } 74 | #endif 75 | #endif 76 | #if defined(MIPP_BW) 77 | #if !defined(MIPP_SSE) || (defined(MIPP_SSE) && MIPP_INSTR_VERSION >= 31) 78 | SECTION("datatype = int16_t") { test_reg_hmin_int(); } 79 | #endif 80 | #if !defined(MIPP_SSE) || (defined(MIPP_SSE) && MIPP_INSTR_VERSION >= 41) 81 | #ifndef _MSC_VER 82 | SECTION("datatype = int8_t") { test_reg_hmin_int(); } 83 | #endif 84 | #endif 85 | #endif 86 | } 87 | #endif 88 | 89 | template 90 | void test_Reg_hmin_int() 91 | { 92 | constexpr int N = mipp::N(); 93 | T inputs1[N]; 94 | std::mt19937 g; 95 | std::uniform_int_distribution dis(std::numeric_limits::min(), std::numeric_limits::max()); 96 | 97 | for (auto j = 0; j < 100; j++) 98 | { 99 | for (auto i = 0; i < N; i++) 100 | inputs1[i] = dis(g); 101 | 102 | mipp::Reg r1 = inputs1; 103 | auto val = mipp::hmin(r1); 104 | 105 | auto res = inputs1[0]; 106 | for (auto i = 1; i < N; i++) 107 | res = std::min(res, inputs1[i]); 108 | 109 | REQUIRE(res == val); 110 | } 111 | } 112 | 113 | template 114 | void test_Reg_hmin_real() 115 | { 116 | constexpr int N = mipp::N(); 117 | T inputs1[N]; 118 | std::mt19937 g; 119 | std::uniform_real_distribution dis(std::numeric_limits::min(), std::numeric_limits::max()); 120 | 121 | for (auto i = 0; i < 100; i++) 122 | { 123 | for (auto i = 0; i < N; i++) 124 | inputs1[i] = dis(g); 125 | 126 | mipp::Reg r1 = inputs1; 127 | auto val = mipp::hmin(r1); 128 | 129 | auto res = inputs1[0]; 130 | for (auto i = 1; i < N; i++) 131 | res = std::min(res, inputs1[i]); 132 | 133 | REQUIRE(res == val); 134 | } 135 | } 136 | 137 | #if !defined(MIPP_SVE_LS) 138 | TEST_CASE("Horizontal minimum - mipp::Reg", "[mipp::hmin]") 139 | { 140 | #if defined(MIPP_64BIT) 141 | SECTION("datatype = double") { test_Reg_hmin_real(); } 142 | #endif 143 | SECTION("datatype = float") { test_Reg_hmin_real(); } 144 | 145 | #if defined(MIPP_64BIT) 146 | #if !defined(MIPP_SSE) && !defined(MIPP_AVX) && !defined(MIPP_NEON) 147 | SECTION("datatype = int64_t") { test_Reg_hmin_int(); } 148 | #endif 149 | #endif 150 | #if !defined(MIPP_AVX) || (defined(MIPP_AVX) && MIPP_INSTR_VERSION >= 2) 151 | #if !defined(MIPP_SSE) || (defined(MIPP_SSE) && MIPP_INSTR_VERSION >= 41) 152 | SECTION("datatype = int32_t") { test_Reg_hmin_int(); } 153 | #endif 154 | #endif 155 | #if defined(MIPP_BW) 156 | #if !defined(MIPP_SSE) || (defined(MIPP_SSE) && MIPP_INSTR_VERSION >= 31) 157 | SECTION("datatype = int16_t") { test_Reg_hmin_int(); } 158 | #endif 159 | #if !defined(MIPP_SSE) || (defined(MIPP_SSE) && MIPP_INSTR_VERSION >= 41) 160 | #ifndef _MSC_VER 161 | SECTION("datatype = int8_t") { test_Reg_hmin_int(); } 162 | #endif 163 | #endif 164 | #endif 165 | } 166 | #endif 167 | -------------------------------------------------------------------------------- /tests/src/reductions/hmul.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | #include 6 | #include 7 | #include 8 | 9 | template 10 | void test_reg_hmul_int() 11 | { 12 | constexpr int N = mipp::N(); 13 | T inputs1[N]; 14 | std::mt19937 g; 15 | std::uniform_int_distribution dis((T)-10, (T)10); 16 | 17 | for (auto j = 0; j < 100; j++) 18 | { 19 | for (auto i = 0; i < N; i++) 20 | inputs1[i] = dis(g); 21 | 22 | mipp::reg r1 = mipp::loadu(inputs1); 23 | auto val = mipp::hmul(r1); 24 | 25 | auto res = inputs1[0]; 26 | for (auto i = 1; i < N; i++) 27 | res *= inputs1[i]; 28 | 29 | REQUIRE(res == val); 30 | } 31 | } 32 | 33 | template 34 | void test_reg_hmul_real() 35 | { 36 | constexpr int N = mipp::N(); 37 | T inputs1[N]; 38 | std::mt19937 g; 39 | std::uniform_real_distribution dis(-2, 2); 40 | 41 | for (auto j = 0; j < 100; j++) 42 | { 43 | for (auto i = 0; i < N; i++) 44 | inputs1[i] = dis(g); 45 | 46 | mipp::reg r1 = mipp::loadu(inputs1); 47 | auto val = mipp::hmul(r1); 48 | 49 | auto res = inputs1[0]; 50 | for (auto i = 1; i < N; i++) 51 | res *= inputs1[i]; 52 | 53 | // REQUIRE(res == Approx(val).epsilon(0.001)); 54 | REQUIRE(res == Approx(val)); 55 | } 56 | } 57 | 58 | #if !defined(MIPP_NO) && !defined(MIPP_SVE_LS) 59 | TEST_CASE("Horizontal multiplication - mipp::reg", "[mipp::hmul]") 60 | { 61 | #if defined(MIPP_64BIT) 62 | SECTION("datatype = double") { test_reg_hmul_real(); } 63 | #endif 64 | SECTION("datatype = float") { test_reg_hmul_real(); } 65 | } 66 | #endif 67 | 68 | template 69 | void test_Reg_hmul_int() 70 | { 71 | constexpr int N = mipp::N(); 72 | T inputs1[N]; 73 | std::mt19937 g; 74 | std::uniform_int_distribution dis((T)-10, (T)10); 75 | 76 | for (auto j = 0; j < 100; j++) 77 | { 78 | for (auto i = 0; i < N; i++) 79 | inputs1[i] = dis(g); 80 | 81 | mipp::Reg r1 = inputs1; 82 | auto val = mipp::hmul(r1); 83 | 84 | auto res = inputs1[0]; 85 | for (auto i = 1; i < N; i++) 86 | res *= inputs1[i]; 87 | 88 | REQUIRE(res == val); 89 | } 90 | } 91 | 92 | template 93 | void test_Reg_hmul_real() 94 | { 95 | constexpr int N = mipp::N(); 96 | T inputs1[N]; 97 | std::mt19937 g; 98 | std::uniform_real_distribution dis(-2, 2); 99 | 100 | for (auto j = 0; j < 100; j++) 101 | { 102 | for (auto i = 0; i < N; i++) 103 | inputs1[i] = dis(g); 104 | 105 | mipp::Reg r1 = inputs1; 106 | auto val = mipp::hmul(r1); 107 | 108 | auto res = inputs1[0]; 109 | for (auto i = 1; i < N; i++) 110 | res *= inputs1[i]; 111 | 112 | // REQUIRE(res == Approx(val).epsilon(0.001)); 113 | REQUIRE(res == Approx(val)); 114 | } 115 | } 116 | 117 | #if !defined(MIPP_NO) && !defined(MIPP_SVE_LS) 118 | TEST_CASE("Horizontal multiplication - mipp::Reg", "[mipp::hmul]") 119 | { 120 | #if defined(MIPP_64BIT) 121 | SECTION("datatype = double") { test_Reg_hmul_real(); } 122 | #endif 123 | SECTION("datatype = float") { test_Reg_hmul_real(); } 124 | } 125 | #endif 126 | -------------------------------------------------------------------------------- /tests/src/static_for.hpp: -------------------------------------------------------------------------------- 1 | // from https://www.codeproject.com/Articles/857354/Compile-Time-Loops-with-Cplusplus-Creating-a-Gener 2 | 3 | #ifndef STATIC_FOR_HPP_ 4 | #define STATIC_FOR_HPP_ 5 | 6 | #include 7 | 8 | #include "static_for.hxx" 9 | 10 | template 12 | inline void static_for(functor_types&&... functor_args) 13 | { 14 | static_for_impl<0, count-1, functor, sequence_width, functor_types...>:: 15 | loop(std::forward(functor_args)...); 16 | } 17 | 18 | template 20 | inline void static_for(functor_types&&... functor_args) 21 | { 22 | static_for_impl:: 23 | loop(std::forward(functor_args)...); 24 | } 25 | 26 | #endif // STATIC_FOR_HPP_ -------------------------------------------------------------------------------- /tests/src/static_for.hxx: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | 4 | #include "static_for.hpp" 5 | 6 | template 8 | struct static_for_impl 9 | { 10 | static inline void loop(functor_types&&... functor_args) 11 | { 12 | // The main sequence point is created, and then we call "next" on each point inside 13 | using sequence = point; 14 | next 15 | (std::integral_constant(), 16 | std::forward(functor_args)...); 17 | } 18 | 19 | private: 20 | 21 | // A point is a node of an n-ary tree 22 | template struct point 23 | { 24 | static constexpr size_t start_ { pt_start }; 25 | static constexpr size_t end_ { pt_end }; 26 | static constexpr size_t count_ { end_ - start_ + 1 }; 27 | static constexpr bool is_end_point_ { count_ <= sequence_width }; 28 | 29 | static constexpr size_t sequence_count() 30 | { 31 | return 32 | points_in_sequence(sequence_width) > sequence_width 33 | ? 34 | sequence_width 35 | : 36 | points_in_sequence(sequence_width); 37 | } 38 | 39 | private: 40 | // Calculates the start and end indexes for a child node 41 | static constexpr size_t child_start(size_t index) 42 | { 43 | return 44 | index == 0 45 | ? 46 | pt_start 47 | : 48 | child_end(index - 1) + 1; 49 | } 50 | static constexpr size_t child_end(size_t index) 51 | { 52 | return 53 | index == sequence_count() - 1 54 | ? 55 | pt_end 56 | : 57 | pt_start + points_in_sequence(sequence_count()) * (index + 1) - 58 | (index < count_ 59 | ? 60 | 1 61 | : 62 | 0); 63 | } 64 | static constexpr size_t points_in_sequence(size_t max) 65 | { 66 | return count_ / max + ( 67 | (count_ % max) > 0 68 | ? 69 | 1 70 | : 71 | 0); 72 | } 73 | 74 | public: 75 | // Generates child nodes when needed 76 | template using child_point = point; 77 | }; 78 | 79 | // flat_for is used to instantiate a section of our our main static_for::loop 80 | // A point is used to specify which numbers this instance of flat_for will use 81 | template struct flat_for 82 | { 83 | // This is the entry point for flat_for 84 | static inline void flat_loop(functor_types&&... functor_args) 85 | { 86 | flat_next(std::integral_constant(), 87 | std::forward(functor_args)...); 88 | } 89 | 90 | private: 91 | // Loop termination 92 | static inline void flat_next 93 | (std::integral_constant, functor_types&&...) 94 | { 95 | } 96 | 97 | // Loop function that calls the function passed to it, as well as recurses 98 | template 99 | static inline void flat_next 100 | (std::integral_constant, functor_types&&... functor_args) 101 | { 102 | flat_functor::template func(std::forward(functor_args)...); 103 | flat_next(std::integral_constant(), 104 | std::forward(functor_args)...); 105 | } 106 | }; 107 | 108 | // This is what gets called when we run flat_for on a point 109 | // It will recurse to more finer grained point until the points are no bigger than sequence_width 110 | template struct flat_sequence 111 | { 112 | template static inline void func(functor_types&&... functor_args) 113 | { 114 | using pt = typename sequence::template child_point; 115 | next 116 | (std::integral_constant(), 117 | std::forward(functor_args)...); 118 | } 119 | }; 120 | 121 | // The true_type function is called when our sequence is small enough to run out 122 | // and call the main functor that was provided to us 123 | template static inline void next 124 | (std::true_type, functor_types&&... functor_args) 125 | { 126 | flat_for:: 127 | flat_loop(std::forward(functor_args)...); 128 | } 129 | 130 | // The false_type function is called when our sequence is still too big, and we need to 131 | // run an internal flat_for loop on the child sequence_points 132 | template static inline void next 133 | (std::false_type, functor_types&&... functor_args) 134 | { 135 | flat_for<0, sequence::sequence_count() - 1, flat_sequence>:: 136 | flat_loop(std::forward(functor_args)...); 137 | } 138 | }; --------------------------------------------------------------------------------