├── .github
    └── workflows
    │   └── ccpp.yml
├── .gitignore
├── .gitlab-ci.yml
├── CMakeLists.txt
├── FindMIPP.cmake
├── LICENSE
├── README.md
├── TODO.md
├── ci
    ├── analysis-cppcheck.sh
    ├── build-coverage-linux-armv7-gcc.sh
    ├── build-coverage-linux-armv8-gcc.sh
    ├── build-coverage-linux-x86-gcc.sh
    ├── build-linux-armv7-clang.sh
    ├── build-linux-armv7-gcc.sh
    ├── build-linux-armv8-clang.sh
    ├── build-linux-armv8-gcc.sh
    ├── build-linux-x86-clang.sh
    ├── build-linux-x86-gcc.sh
    ├── build-linux-x86-icpc.sh
    ├── build-macos-x86-clang.sh
    ├── build-windows-x86-gcc.bat
    ├── build-windows-x86-msvc.bat
    ├── coverage-linux.sh
    ├── test-linux-intel-sde-unit.sh
    ├── test-linux-macos-unit.sh
    ├── test-windows-unit.bat
    └── tools
    │   ├── build-linux-x86-gcc-generic.sh
    │   ├── threads.bat
    │   └── threads.sh
├── cmake
    ├── MIPPConfig.cmake.in
    └── cmake_uninstall.cmake.in
├── codegen
    ├── gen_compress.py
    ├── mipp_LUT.cpp.j2
    ├── requirements.txt
    └── vcompress-LUT.cpp.j2
├── examples
    ├── conversion.cpp
    ├── gemm.cpp
    ├── initreg.cpp
    ├── mask.cpp
    ├── mathfun.cpp
    ├── operator.cpp
    ├── reduction.cpp
    ├── sort
    │   ├── LC_sorter.hpp
    │   ├── LC_sorter_simd.hpp
    │   └── Quick_sorter.hpp
    └── sorting.cpp
├── include
    ├── math
    │   ├── avx512_mathfun.h
    │   ├── avx512_mathfun.hxx
    │   ├── avx_mathfun.h
    │   ├── avx_mathfun.hxx
    │   ├── neon_mathfun.h
    │   ├── neon_mathfun.hxx
    │   ├── sse_mathfun.h
    │   └── sse_mathfun.hxx
    ├── mipp.h
    ├── mipp_impl_AVX.hxx
    ├── mipp_impl_AVX512.hxx
    ├── mipp_impl_NEON.hxx
    ├── mipp_impl_SSE.hxx
    ├── mipp_impl_SVE.hxx
    ├── mipp_object.hxx
    ├── mipp_scalar_op.h
    └── mipp_scalar_op.hxx
├── mipp.jpg
└── tests
    ├── lib
        └── Catch2
        │   ├── README.md
        │   └── include
        │       └── catch.hpp
    └── src
        ├── arithmetic_operations
            ├── abs.cpp
            ├── add.cpp
            ├── cdiv.cpp
            ├── cmul.cpp
            ├── cmulconj.cpp
            ├── conj.cpp
            ├── cvt.cpp
            ├── div.cpp
            ├── div2.cpp
            ├── div4.cpp
            ├── fmadd.cpp
            ├── fmsub.cpp
            ├── fnmadd.cpp
            ├── fnmsub.cpp
            ├── max.cpp
            ├── min.cpp
            ├── mul.cpp
            ├── neg.cpp
            ├── norm.cpp
            ├── pack.cpp
            ├── round.cpp
            ├── rsqrt.cpp
            ├── sat.cpp
            ├── sign.cpp
            ├── sqrt.cpp
            ├── sub.cpp
            └── trunc.cpp
        ├── bitwise_operations
            ├── andb.cpp
            ├── andnb.cpp
            ├── lshift.cpp
            ├── lshiftr.cpp
            ├── notb.cpp
            ├── orb.cpp
            ├── rshift.cpp
            ├── rshiftr.cpp
            └── xorb.cpp
        ├── logical_comparisons
            ├── cmpeq.cpp
            ├── cmpge.cpp
            ├── cmpgt.cpp
            ├── cmple.cpp
            ├── cmplt.cpp
            └── cmpneq.cpp
        ├── main.cpp
        ├── math_functions
            ├── cos.cpp
            ├── exp.cpp
            ├── log.cpp
            ├── sin.cpp
            └── sincos.cpp
        ├── memory_operations
            ├── blend.cpp
            ├── combine.cpp
            ├── compress.cpp
            ├── deinterleave.cpp
            ├── gather.cpp
            ├── get.cpp
            ├── getfirst.cpp
            ├── high.cpp
            ├── interleave.cpp
            ├── interleavehi.cpp
            ├── interleavelo.cpp
            ├── load.cpp
            ├── low.cpp
            ├── lrot.cpp
            ├── rrot.cpp
            ├── scatter.cpp
            ├── set.cpp
            ├── shuff.cpp
            └── store.cpp
        ├── reductions
            ├── hadd.cpp
            ├── hmax.cpp
            ├── hmin.cpp
            ├── hmul.cpp
            └── testz.cpp
        ├── static_for.hpp
        └── static_for.hxx


/.gitignore:
--------------------------------------------------------------------------------
 1 | .gitignore
 2 | .cproject
 3 | .project
 4 | cppcheck/
 5 | examples/a.out
 6 | htdocs/
 7 | .settings/
 8 | src_files.txt
 9 | tests/build*
10 | tests/code_coverage_files/
11 | tests/code_coverage_report/
12 | src/gen/*
13 | 
14 | build*
15 | 


--------------------------------------------------------------------------------
/FindMIPP.cmake:
--------------------------------------------------------------------------------
 1 | 
 2 | if(MIPP_INCLUDE_DIRS)
 3 |   set(MIPP_FIND_QUIETLY TRUE)
 4 | endif()
 5 | 
 6 | find_path(MIPP_INCLUDE_DIRS NAMES mipp.h
 7 |     HINTS
 8 |         ${MIPP_ROOT}
 9 |         $ENV{HOME}/.local
10 |     PATH_SUFFIXES include/mipp
11 | )
12 | 
13 | include(FindPackageHandleStandardArgs)
14 | find_package_handle_standard_args(MIPP DEFAULT_MSG MIPP_INCLUDE_DIRS)
15 | 
16 | if(MIPP_FOUND AND NOT TARGET MIPP)
17 |     message(STATUS "MIPP_FOUND -> creating interface library MIPP at ${MIPP_INCLUDE_DIRS}")
18 |     add_library(MIPP INTERFACE)
19 |     target_compile_definitions(MIPP INTERFACE HAVE_MIPP=1)
20 |     target_include_directories(MIPP INTERFACE ${MIPP_INCLUDE_DIRS})
21 |     target_compile_features(MIPP INTERFACE cxx_std_11)
22 | else()
23 |     message(WARNING "MIPP not found.")
24 | endif()
25 | 
26 | mark_as_advanced(MIPP_INCLUDE_DIRS)
27 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2017-2025 MIPP
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.


--------------------------------------------------------------------------------
/TODO.md:
--------------------------------------------------------------------------------
 1 | # TODO
 2 | 
 3 | - [ ] Add left operator functions (ex: "5 + mipp::Reg" should work then!)
 4 | - [ ] Fix and improve GitHub actions
 5 | - [ ] Create a docker image with "Intel Software Development Emulator" to enable 
 6 |       AVX-512 instructions emulation on the runners that does not support
 7 |       native AVX-512
 8 | - [ ] Find a workaround for the 16-bit SSE `compress` that requires BMI2 
 9 |       extension (remove `_pext_u32` dependency, available since Haswell)
10 | - [x] Add SVE to the code coverage
11 | - [x] Compile the examples in the `CMakeFiles.txt
12 | - [x] Add `compress` for NEONv1 with emulation of `vqtbl1q` based on two `vtbl2`
13 | - [x] Improve NEONv2 `shuff` operations with `vqtbl1q` instruction


--------------------------------------------------------------------------------
/ci/analysis-cppcheck.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | set -x
 3 | 
 4 | cppcheck --version
 5 | 
 6 | mkdir cppcheck
 7 | #cppcheck --suppress=missingIncludeSystem -I./src/ --force --enable=all --std=c++11 -U_MSC_VER ./src/ 2> cppcheck_all.log
 8 | find .\/src\/ -type f -follow -print | grep "[.]h$\|[.]hpp$\|[.]hxx$\|[.]cpp$" > src_files.txt
 9 | cppcheck --language=c++ --suppress=missingIncludeSystem --force --enable=all --std=c++11 -U_MSC_VER --file-list=src_files.txt 2> cppcheck/cppcheck_all.log
10 | cat cppcheck/cppcheck_all.log | grep "error:"          > cppcheck/cppcheck_error.log
11 | cat cppcheck/cppcheck_all.log | grep "warning:"        > cppcheck/cppcheck_warning.log
12 | cat cppcheck/cppcheck_all.log | grep "performance:"    > cppcheck/cppcheck_performance.log
13 | cat cppcheck/cppcheck_all.log | grep "style:"          > cppcheck/cppcheck_style.log
14 | cat cppcheck/cppcheck_all.log | grep "portability:"    > cppcheck/cppcheck_portability.log
15 | cat cppcheck/cppcheck_all.log | grep "information:"    > cppcheck/cppcheck_information.log
16 | cat cppcheck/cppcheck_all.log | grep "unusedFunction:" > cppcheck/cppcheck_unusedFunction.log
17 | cat cppcheck/cppcheck_all.log | grep "missingInclude:" > cppcheck/cppcheck_missingInclude.log
18 | 
19 | COUNT=$(wc -l < cppcheck/cppcheck_error.log )
20 | 
21 | if [ $COUNT -gt 1 ]; then
22 | 	echo "Error count is $COUNT! cppcheck run failed :-(.";
23 | 	echo ""
24 | 	echo "Errors list:"
25 | 	cat cppcheck/cppcheck_error.log
26 | 	exit 1;
27 | elif [ $COUNT -gt 0 ]; then
28 | 	echo "There is one false positive error.";
29 | 	echo ""
30 | 	echo "Errors list:"
31 | 	cat cppcheck/cppcheck_error.log
32 | else
33 | 	echo "There is no error :-)."
34 | fi
35 | 
36 | exit 0;


--------------------------------------------------------------------------------
/ci/build-coverage-linux-armv7-gcc.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | set -x
 3 | 
 4 | WD=$(pwd)
 5 | 
 6 | function gen_coverage_info {
 7 | 	build=$1
 8 | 	mkdir $build
 9 | 	cd $build
10 | 	cmake ../.. -G"Unix Makefiles" -DCMAKE_CXX_COMPILER=g++ -DCMAKE_BUILD_TYPE=Debug -DCMAKE_CXX_FLAGS_DEBUG="-g -O0" -DCMAKE_CXX_FLAGS="-Wall -funroll-loops -finline-functions --coverage $2" -DCMAKE_EXE_LINKER_FLAGS="--coverage" -DMIPP_STATIC_LIB=ON
11 | 	rc=$?; if [[ $rc != 0 ]]; then exit $rc; fi
12 | 	make -j $THREADS
13 | 	rc=$?; if [[ $rc != 0 ]]; then exit $rc; fi
14 | 	./bin/run-tests
15 | 	cd ..
16 | 	# rc=$?; if [[ $rc != 0 ]]; then exit $rc; fi
17 | 	lcov --capture --directory $build/CMakeFiles/tests_exe.dir/tests/src/ --output-file code_coverage_files/$build.info
18 | 	# rc=$?; if [[ $rc != 0 ]]; then exit $rc; fi
19 | 	lcov --remove code_coverage_files/$build.info "*/usr*" "*lib/*" "*/tests/src*" --output-file code_coverage_files/$build.info
20 | 	# rc=$?; if [[ $rc != 0 ]]; then exit $rc; fi
21 | 	if [[ -s code_coverage_files/$build.info ]]
22 | 	then
23 | 		sed -i -e "s#${WD}#\.\.#g" code_coverage_files/$build.info
24 | 	else
25 | 		rm code_coverage_files/$build.info
26 | 	fi
27 | }
28 | 
29 | python3 codegen/gen_compress.py
30 | 
31 | cd tests
32 | mkdir code_coverage_files || true
33 | 
34 | build_root=build_coverage_linux_armv7_gcc
35 | gen_coverage_info "${build_root}_nointr"   "-DMIPP_NO_INTRINSICS"
36 | gen_coverage_info "${build_root}_neon"     "-march=armv7-a -mfpu=neon"
37 | gen_coverage_info "${build_root}_neon_fma" "-march=armv7-a -mfpu=neon-vfpv4"
38 | 


--------------------------------------------------------------------------------
/ci/build-coverage-linux-armv8-gcc.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | set -x
 3 | 
 4 | WD=$(pwd)
 5 | 
 6 | function gen_coverage_info {
 7 | 	build=$1
 8 | 	mkdir $build
 9 | 	cd $build
10 | 	cmake ../.. -G"Unix Makefiles" -DCMAKE_CXX_COMPILER=g++ -DCMAKE_BUILD_TYPE=Debug -DCMAKE_CXX_FLAGS_DEBUG="-g -O0" -DCMAKE_CXX_FLAGS="-Wall -funroll-loops -finline-functions --coverage $2" -DCMAKE_EXE_LINKER_FLAGS="--coverage" -DMIPP_STATIC_LIB=ON
11 | 	rc=$?; if [[ $rc != 0 ]]; then exit $rc; fi
12 | 	make -j $THREADS
13 | 	rc=$?; if [[ $rc != 0 ]]; then exit $rc; fi
14 | 	if [[ $3 == native ]]; then
15 | 		# execute the tests natively
16 | 		./bin/run-tests
17 | 	else
18 | 		# use Arm Instruction Emulator (ArmIE) to execute the tests
19 | 		source /usr/share/modules/init/profile.sh
20 | 		module load armie22/22.0
21 | 		nbits=$(echo $build | grep -Eo '[0-9]+(\.[0-9]+)?' | tail -n 1)
22 | 		if [ -z "$nbits" ]
23 | 		then
24 | 			echo "The build name is incompatible with SVE build, it should contain the SIMD size (current wrong build name is '$build', an example of expected build name is: 'build_coverage_linux_armv8_gcc_sve_ls256')."
25 | 			exit 1
26 | 		fi
27 | 		armie -msve-vector-bits=$nbits -- ./bin/run-tests
28 | 		#qemu-aarch64 -cpu max,sve-default-vector-length=$nbits ./bin/run-tests
29 | 	fi
30 | 	cd ..
31 | 	# rc=$?; if [[ $rc != 0 ]]; then exit $rc; fi
32 | 	lcov --capture --directory $build/CMakeFiles/tests_exe.dir/tests/src/ --output-file code_coverage_files/$build.info
33 | 	# rc=$?; if [[ $rc != 0 ]]; then exit $rc; fi
34 | 	lcov --remove code_coverage_files/$build.info "*/usr*" "*lib/*" "*/tests/src*" --output-file code_coverage_files/$build.info
35 | 	# rc=$?; if [[ $rc != 0 ]]; then exit $rc; fi
36 | 	if [[ -s code_coverage_files/$build.info ]]
37 | 	then
38 | 		sed -i -e "s#${WD}#\.\.#g" code_coverage_files/$build.info
39 | 	else
40 | 		rm code_coverage_files/$build.info
41 | 	fi
42 | }
43 | 
44 | python3 codegen/gen_compress.py
45 | 
46 | cd tests
47 | mkdir code_coverage_files || true
48 | 
49 | build_root=build_coverage_linux_armv8_gcc
50 | gen_coverage_info "${build_root}_nointr"     "-DMIPP_NO_INTRINSICS"                      "native"
51 | gen_coverage_info "${build_root}_neon"       "-march=armv8.1-a+simd"                     "native"
52 | gen_coverage_info "${build_root}_sve_ls128"  "-march=armv8-a+sve -msve-vector-bits=128"  "armie"
53 | gen_coverage_info "${build_root}_sve_ls256"  "-march=armv8-a+sve -msve-vector-bits=256"  "armie"
54 | gen_coverage_info "${build_root}_sve_ls512"  "-march=armv8-a+sve -msve-vector-bits=512"  "armie"
55 | gen_coverage_info "${build_root}_sve_ls1024" "-march=armv8-a+sve -msve-vector-bits=1024" "armie"
56 | 


--------------------------------------------------------------------------------
/ci/build-coverage-linux-x86-gcc.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | set -x
 3 | 
 4 | WD=$(pwd)
 5 | 
 6 | # install Intel SDE emulator --------------------------------------------------
 7 | apt update
 8 | apt install -y wget xz-utils
 9 | echo 0 > /proc/sys/kernel/yama/ptrace_scope
10 | mkdir softwares
11 | cd softwares
12 | wget https://largo.lip6.fr/monolithe/downloads/sde-external-9.33.0-2024-01-07-lin.tar.xz
13 | tar -xvvf sde-external-9.33.0-2024-01-07-lin.tar.xz
14 | ln -s $WD/softwares/sde-external-9.33.0-2024-01-07-lin $WD/softwares/sde
15 | export PATH=$WD/softwares/sde:$PATH
16 | cd ..
17 | # -----------------------------------------------------------------------------
18 | 
19 | function gen_coverage_info {
20 | 	build=$1
21 | 	mkdir $build
22 | 	cd $build
23 | 	cmake ../.. -G"Unix Makefiles" -DCMAKE_CXX_COMPILER=g++ -DCMAKE_BUILD_TYPE=Debug -DCMAKE_CXX_FLAGS_DEBUG="-g -O0" -DCMAKE_CXX_FLAGS="-Wall -funroll-loops -finline-functions --coverage $2" -DCMAKE_EXE_LINKER_FLAGS="--coverage" -DMIPP_STATIC_LIB=ON
24 | 	rc=$?; if [[ $rc != 0 ]]; then exit $rc; fi
25 | 	make -j $THREADS
26 | 	rc=$?; if [[ $rc != 0 ]]; then exit $rc; fi
27 | 	if [[ $3 == native ]]; then
28 | 		# execute the tests natively
29 | 		./bin/run-tests
30 | 	else
31 | 		# use the Intel SDE emulator to execute the tests
32 | 		sde64 $3 -- ./bin/run-tests
33 | 	fi
34 | 	cd ..
35 | 	# rc=$?; if [[ $rc != 0 ]]; then exit $rc; fi
36 | 	lcov --capture --directory $build/CMakeFiles/tests_exe.dir/tests/src/ --output-file code_coverage_files/$build.info
37 | 	# rc=$?; if [[ $rc != 0 ]]; then exit $rc; fi
38 | 	lcov --remove code_coverage_files/$build.info "*/usr*" "*lib/*" "*/tests/src*" --output-file code_coverage_files/$build.info
39 | 	# rc=$?; if [[ $rc != 0 ]]; then exit $rc; fi
40 | 	if [[ -s code_coverage_files/$build.info ]]
41 | 	then
42 | 		sed -i -e "s#${WD}#\.\.#g" code_coverage_files/$build.info
43 | 	else
44 | 		rm code_coverage_files/$build.info
45 | 	fi
46 | }
47 | 
48 | python3 codegen/gen_compress.py
49 | 
50 | cd tests
51 | mkdir code_coverage_files || true
52 | 
53 | build_root=build_coverage_linux_x86_gcc
54 | gen_coverage_info "${build_root}_nointr"        "-DMIPP_NO_INTRINSICS"                            "native"
55 | gen_coverage_info "${build_root}_sse2"          "-msse2"                                          "native"
56 | gen_coverage_info "${build_root}_sse3"          "-msse3"                                          "native"
57 | gen_coverage_info "${build_root}_ssse3"         "-mssse3"                                         "native"
58 | gen_coverage_info "${build_root}_sse4_1"        "-msse4.1"                                        "native"
59 | gen_coverage_info "${build_root}_sse4_2"        "-msse4.2"                                        "native"
60 | gen_coverage_info "${build_root}_avx"           "-mavx"                                           "native"
61 | gen_coverage_info "${build_root}_avx2"          "-mavx2"                                          "native"
62 | gen_coverage_info "${build_root}_avx2_bmi2"     "-mavx2 -mbmi2"                                   "native"
63 | gen_coverage_info "${build_root}_avx2_bmi2_fma" "-mavx2 -mbmi2 -mfma"                             "native"
64 | gen_coverage_info "${build_root}_avx512f"       "-mavx512f"                                       "-skx" # skylake server architecture emulation
65 | gen_coverage_info "${build_root}_avx512bw"      "-mavx512f -mavx512bw"                            "-skx" # skylake server architecture emulation
66 | gen_coverage_info "${build_root}_avx512vbmi2"   "-mavx512f -mavx512bw -mavx512vbmi -mavx512vbmi2" "-spr" # sapphire rapid architecture emulation
67 | 


--------------------------------------------------------------------------------
/ci/build-linux-armv7-clang.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | set -x
 3 | 
 4 | function compile {
 5 | 	build=$1
 6 | 	mkdir $build
 7 | 	cd $build
 8 | 	cmake .. -G"Unix Makefiles" -DCMAKE_CXX_COMPILER=clang++ -DCMAKE_BUILD_TYPE=Release -DCMAKE_CXX_FLAGS="-Wall -funroll-loops $2" -DMIPP_STATIC_LIB=ON
 9 | 	rc=$?; if [[ $rc != 0 ]]; then exit $rc; fi
10 | 	make -j $THREADS
11 | 	rc=$?; if [[ $rc != 0 ]]; then exit $rc; fi
12 | 	cd ..
13 | }
14 | 
15 | apt update
16 | apt install -y python3-pip
17 | pip3 install --user -r codegen/requirements.txt
18 | python3 codegen/gen_compress.py
19 | 
20 | build_root=build_linux_armv7_clang
21 | compile "${build_root}_nointr"   "-DMIPP_NO_INTRINSICS"
22 | compile "${build_root}_neon"     "-march=armv7-a -mfpu=neon"
23 | compile "${build_root}_neon_fma" "-march=armv7-a -mfpu=neon-vfpv4"
24 | 


--------------------------------------------------------------------------------
/ci/build-linux-armv7-gcc.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | set -x
 3 | 
 4 | function compile {
 5 | 	build=$1
 6 | 	mkdir $build
 7 | 	cd $build
 8 | 	cmake .. -G"Unix Makefiles" -DCMAKE_CXX_COMPILER=g++ -DCMAKE_BUILD_TYPE=Release -DCMAKE_CXX_FLAGS="-Wall -funroll-loops -finline-functions $2" -DMIPP_STATIC_LIB=ON
 9 | 	rc=$?; if [[ $rc != 0 ]]; then exit $rc; fi
10 | 	make -j $THREADS
11 | 	rc=$?; if [[ $rc != 0 ]]; then exit $rc; fi
12 | 	cd ..
13 | }
14 | 
15 | apt update
16 | apt install -y python3-pip
17 | pip3 install --user -r codegen/requirements.txt
18 | python3 codegen/gen_compress.py
19 | 
20 | build_root=build_linux_armv7_gcc
21 | compile "${build_root}_nointr"   "-DMIPP_NO_INTRINSICS"
22 | compile "${build_root}_neon"     "-march=armv7-a -mfpu=neon"
23 | compile "${build_root}_neon_fma" "-march=armv7-a -mfpu=neon-vfpv4"
24 | 


--------------------------------------------------------------------------------
/ci/build-linux-armv8-clang.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | set -x
 3 | 
 4 | function compile {
 5 | 	build=$1
 6 | 	mkdir $build
 7 | 	cd $build
 8 | 	cmake .. -G"Unix Makefiles" -DCMAKE_CXX_COMPILER=clang++ -DCMAKE_BUILD_TYPE=Release -DCMAKE_CXX_FLAGS="-Wall -funroll-loops $2" -DMIPP_STATIC_LIB=ON
 9 | 	rc=$?; if [[ $rc != 0 ]]; then exit $rc; fi
10 | 	make -j $THREADS
11 | 	rc=$?; if [[ $rc != 0 ]]; then exit $rc; fi
12 | 	cd ..
13 | }
14 | 
15 | apt update
16 | apt install -y python3-pip
17 | pip3 install --user -r codegen/requirements.txt
18 | python3 codegen/gen_compress.py
19 | 
20 | build_root=build_linux_armv8_clang
21 | compile "${build_root}_nointr" "-DMIPP_NO_INTRINSICS"
22 | compile "${build_root}_neon"   "-march=armv8.1-a+simd"
23 | 


--------------------------------------------------------------------------------
/ci/build-linux-armv8-gcc.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | set -x
 3 | 
 4 | function compile {
 5 | 	build=$1
 6 | 	mkdir $build
 7 | 	cd $build
 8 | 	cmake .. -G"Unix Makefiles" -DCMAKE_CXX_COMPILER=g++ -DCMAKE_BUILD_TYPE=Release -DCMAKE_CXX_FLAGS="-Wall -funroll-loops -finline-functions $2" -DMIPP_STATIC_LIB=ON
 9 | 	rc=$?; if [[ $rc != 0 ]]; then exit $rc; fi
10 | 	make -j $THREADS
11 | 	rc=$?; if [[ $rc != 0 ]]; then exit $rc; fi
12 | 	cd ..
13 | }
14 | 
15 | apt update
16 | apt install -y python3-pip
17 | pip3 install --user -r codegen/requirements.txt
18 | python3 codegen/gen_compress.py
19 | 
20 | build_root=build_linux_armv8_gcc
21 | compile "${build_root}_nointr"     "-DMIPP_NO_INTRINSICS"
22 | compile "${build_root}_neon"       "-march=armv8.1-a+simd"
23 | compile "${build_root}_sve_ls128"  "-march=armv8-a+sve -msve-vector-bits=128"
24 | compile "${build_root}_sve_ls256"  "-march=armv8-a+sve -msve-vector-bits=256"
25 | compile "${build_root}_sve_ls512"  "-march=armv8-a+sve -msve-vector-bits=512"
26 | compile "${build_root}_sve_ls1024" "-march=armv8-a+sve -msve-vector-bits=1024"


--------------------------------------------------------------------------------
/ci/build-linux-x86-clang.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | set -x
 3 | 
 4 | function compile {
 5 | 	build=$1
 6 | 	mkdir $build
 7 | 	cd $build
 8 | 	cmake .. -G"Unix Makefiles" -DCMAKE_CXX_COMPILER=clang++ -DCMAKE_BUILD_TYPE=Release -DCMAKE_CXX_FLAGS="-Wall -funroll-loops $2" -DCMAKE_EXE_LINKER_FLAGS="-static -static-libgcc -static-libstdc++" -DCMAKE_EXE_LINKER_FLAGS="-pie" -DMIPP_STATIC_LIB=ON
 9 | 	rc=$?; if [[ $rc != 0 ]]; then exit $rc; fi
10 | 	make -j $THREADS
11 | 	rc=$?; if [[ $rc != 0 ]]; then exit $rc; fi
12 | 	cd ..
13 | }
14 | 
15 | apt update
16 | apt install -y python3-pip
17 | pip3 install --user -r codegen/requirements.txt
18 | python3 codegen/gen_compress.py
19 | 
20 | build_root=build_linux_x86_clang
21 | compile "${build_root}_nointr"        "-DMIPP_NO_INTRINSICS"
22 | compile "${build_root}_sse2"          "-msse2"
23 | compile "${build_root}_sse3"          "-msse3"
24 | compile "${build_root}_ssse3"         "-mssse3"
25 | compile "${build_root}_sse4_1"        "-msse4.1"
26 | compile "${build_root}_sse4_2"        "-msse4.2"
27 | compile "${build_root}_avx"           "-mavx"
28 | compile "${build_root}_avx2"          "-mavx2"
29 | compile "${build_root}_avx2_bmi2"     "-mavx2 -mbmi2"
30 | compile "${build_root}_avx2_bmi2_fma" "-mavx2 -mbmi2 -mfma"
31 | compile "${build_root}_avx512f"       "-mavx512f"
32 | compile "${build_root}_avx512bw"      "-mavx512f -mavx512bw"
33 | compile "${build_root}_avx512vbmi2"   "-mavx512f -mavx512bw -mavx512vbmi -mavx512vbmi2"
34 | 


--------------------------------------------------------------------------------
/ci/build-linux-x86-gcc.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | set -x
 3 | 
 4 | function compile {
 5 | 	build=$1
 6 | 	mkdir $build
 7 | 	cd $build
 8 | 	cmake .. -G"Unix Makefiles" -DCMAKE_CXX_COMPILER=g++ -DCMAKE_BUILD_TYPE=Release -DCMAKE_CXX_FLAGS="-Wall -funroll-loops -finline-functions $2" -DCMAKE_EXE_LINKER_FLAGS="-static -static-libgcc -static-libstdc++" -DMIPP_STATIC_LIB=ON
 9 | 	rc=$?; if [[ $rc != 0 ]]; then exit $rc; fi
10 | 	make -j $THREADS
11 | 	rc=$?; if [[ $rc != 0 ]]; then exit $rc; fi
12 | 	cd ..
13 | }
14 | 
15 | apt update
16 | apt install -y python3-pip
17 | pip3 install --user -r codegen/requirements.txt
18 | python3 codegen/gen_compress.py
19 | 
20 | build_root=build_linux_x86_gcc
21 | compile "${build_root}_nointr"        "-DMIPP_NO_INTRINSICS"
22 | compile "${build_root}_sse2"          "-msse2"
23 | compile "${build_root}_sse3"          "-msse3"
24 | compile "${build_root}_ssse3"         "-mssse3"
25 | compile "${build_root}_sse4_1"        "-msse4.1"
26 | compile "${build_root}_sse4_2"        "-msse4.2"
27 | compile "${build_root}_avx"           "-mavx"
28 | compile "${build_root}_avx2"          "-mavx2"
29 | compile "${build_root}_avx2_bmi2"     "-mavx2 -mbmi2"
30 | compile "${build_root}_avx2_bmi2_fma" "-mavx2 -mbmi2 -mfma"
31 | compile "${build_root}_avx512f"       "-mavx512f"
32 | compile "${build_root}_avx512bw"      "-mavx512f -mavx512bw"
33 | compile "${build_root}_avx512vbmi2"   "-mavx512f -mavx512bw -mavx512vbmi -mavx512vbmi2"
34 | 


--------------------------------------------------------------------------------
/ci/build-linux-x86-icpc.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | set -x
 3 | 
 4 | function compile {
 5 | 	build=$1
 6 | 	mkdir $build
 7 | 	cd $build
 8 | 	cmake .. -G"Unix Makefiles" -DCMAKE_CXX_COMPILER=icpc -DCMAKE_BUILD_TYPE=Release -DCMAKE_CXX_FLAGS="-Wall -funroll-loops -finline-functions -std=c++11 $2" -DCMAKE_EXE_LINKER_FLAGS="-static -static-libgcc -static-libstdc++" -DMIPP_STATIC_LIB=ON
 9 | 	rc=$?; if [[ $rc != 0 ]]; then exit $rc; fi
10 | 	make -j $THREADS
11 | 	rc=$?; if [[ $rc != 0 ]]; then exit $rc; fi
12 | 	cd ..
13 | }
14 | 
15 | pip3 install --user -r codegen/requirements.txt
16 | python3 codegen/gen_compress.py
17 | 
18 | source /opt/intel/vars-intel.sh
19 | 
20 | build_root=build_linux_x86_icpc
21 | compile "${build_root}_nointr"    "-DMIPP_NO_INTRINSICS"
22 | compile "${build_root}_sse2"      "-msse2"
23 | compile "${build_root}_sse3"      "-msse3"
24 | compile "${build_root}_ssse3"     "-mssse3"
25 | compile "${build_root}_sse4_1"    "-msse4.1"
26 | compile "${build_root}_sse4_2"    "-msse4.2"
27 | compile "${build_root}_avx"       "-mavx"
28 | compile "${build_root}_avx2_fma"  "-march=core-avx2 -fma"
29 | # compile "${build_root}_avx512mic" "-xMIC-AVX512"
30 | compile "${build_root}_avx512f"   "-xCOMMON-AVX512"
31 | compile "${build_root}_avx512bw"  "-xCORE-AVX512"


--------------------------------------------------------------------------------
/ci/build-macos-x86-clang.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | set -x
 3 | 
 4 | function compile {
 5 | 	build=$1
 6 | 	mkdir $build
 7 | 	cd $build
 8 | 	cmake .. -G"Unix Makefiles" -DCMAKE_CXX_COMPILER=clang++ -DCMAKE_BUILD_TYPE=Release -DCMAKE_CXX_FLAGS="-Wall -funroll-loops -finline-functions $2" -DMIPP_STATIC_LIB=ON
 9 | 	rc=$?; if [[ $rc != 0 ]]; then exit $rc; fi
10 | 	make
11 | 	rc=$?; if [[ $rc != 0 ]]; then exit $rc; fi
12 | 	cd ..
13 | }
14 | 
15 | apt update
16 | apt install -y python3-pip
17 | pip3 install --user -r codegen/requirements.txt
18 | python3 codegen/gen_compress.py
19 | 
20 | build_root=build_macos_x86_clang
21 | compile "${build_root}_nointr"   "-DMIPP_NO_INTRINSICS"
22 | compile "${build_root}_sse2"     "-msse2"
23 | compile "${build_root}_sse3"     "-msse3"
24 | compile "${build_root}_ssse3"    "-mssse3"
25 | compile "${build_root}_sse4_1"   "-msse4.1"
26 | compile "${build_root}_sse4_2"   "-msse4.2"
27 | compile "${build_root}_avx"      "-mavx"
28 | compile "${build_root}_avx2"     "-mavx2"
29 | compile "${build_root}_avx2_fma" "-mavx2 -mfma"


--------------------------------------------------------------------------------
/ci/build-windows-x86-gcc.bat:
--------------------------------------------------------------------------------
 1 | @echo on
 2 | 
 3 | call ./ci/tools/threads.bat
 4 | 
 5 | rem set build_root=build_windows_x86_gcc
 6 | call :compile "build_windows_x86_gcc_nointr"   "-Wno-attributes -DMIPP_NO_INTRINSICS"
 7 | call :compile "build_windows_x86_gcc_sse2"     "-Wno-attributes -msse2"
 8 | call :compile "build_windows_x86_gcc_sse3"     "-Wno-attributes -msse3"
 9 | call :compile "build_windows_x86_gcc_ssse3"    "-Wno-attributes -mssse3"
10 | call :compile "build_windows_x86_gcc_sse4_1"   "-Wno-attributes -msse4.1"
11 | call :compile "build_windows_x86_gcc_sse4_2"   "-Wno-attributes -msse4.2"
12 | call :compile "build_windows_x86_gcc_avx"      "-Wno-attributes -mavx"
13 | call :compile "build_windows_x86_gcc_avx2"     "-Wno-attributes -mavx2"
14 | call :compile "build_windows_x86_gcc_avx2_fma" "-Wno-attributes -mavx2 -mfma"
15 | rem call :compile "build_windows_x86_gcc_avx512f"  "-mavx512f"
16 | 
17 | exit /B %ERRORLEVEL%
18 | 
19 | :compile
20 | set build=%~1%
21 | set params=%~2%
22 | mkdir %build%
23 | cd %build%
24 | cmake .. -G"MinGW Makefiles" -DCMAKE_CXX_COMPILER=g++.exe -DCMAKE_BUILD_TYPE=Release -DCMAKE_CXX_FLAGS="-Wall -funroll-loops -finline-functions %params%" -DMIPP_STATIC_LIB=OFF
25 | if %ERRORLEVEL% neq 0 exit %ERRORLEVEL%
26 | mingw32-make -j %THREADS%
27 | if %ERRORLEVEL% neq 0 exit %ERRORLEVEL%
28 | cd ..
29 | exit /B 0


--------------------------------------------------------------------------------
/ci/build-windows-x86-msvc.bat:
--------------------------------------------------------------------------------
 1 | @echo on
 2 | 
 3 | call ./ci/tools/threads.bat
 4 | 
 5 | set "VSCMD_START_DIR=%CD%"
 6 | call "%VS_PATH%\VC\Auxiliary\Build\vcvars64.bat"
 7 | 
 8 | rem set build_root=build_windows_x86_msvc
 9 | call :compile "build_windows_x86_msvc_nointr"   "-DMIPP_NO_INTRINSICS"
10 | call :compile "build_windows_x86_msvc_sse2"     "/arch:SSE2   -D__SSE__ -D__SSE2__"
11 | call :compile "build_windows_x86_msvc_sse3"     "/arch:SSE3   -D__SSE__ -D__SSE2__ -D__SSE3__"
12 | call :compile "build_windows_x86_msvc_ssse3"    "/arch:SSSE3  -D__SSE__ -D__SSE2__ -D__SSE3__ -D__SSSE3__"
13 | call :compile "build_windows_x86_msvc_sse4_1"   "/arch:SSE4.1 -D__SSE__ -D__SSE2__ -D__SSE3__ -D__SSSE3__ -D__SSE4_1__"
14 | call :compile "build_windows_x86_msvc_sse4_2"   "/arch:SSE4.2 -D__SSE__ -D__SSE2__ -D__SSE3__ -D__SSSE3__ -D__SSE4_1__ -D__SSE4_2__"
15 | call :compile "build_windows_x86_msvc_avx"      "/arch:AVX    -D__SSE__ -D__SSE2__ -D__SSE3__ -D__SSSE3__ -D__SSE4_1__ -D__SSE4_2__ -D__AVX__"
16 | call :compile "build_windows_x86_msvc_avx2"     "/arch:AVX2   -D__SSE__ -D__SSE2__ -D__SSE3__ -D__SSSE3__ -D__SSE4_1__ -D__SSE4_2__ -D__AVX__ -D__AVX2__"
17 | call :compile "build_windows_x86_msvc_avx2_fma" "/arch:AVX2   -D__SSE__ -D__SSE2__ -D__SSE3__ -D__SSSE3__ -D__SSE4_1__ -D__SSE4_2__ -D__AVX__ -D__AVX2__ -D__FMA__"
18 | rem call :compile "build_windows_x86_msvc_avx512f"  "/arch:AVX512"
19 | 
20 | exit /B %ERRORLEVEL%
21 | 
22 | :compile
23 | set build=%~1%
24 | set params=%~2%
25 | mkdir %build%
26 | cd %build%
27 | cmake .. -G"Visual Studio 15 2017 Win64" -DCMAKE_CXX_FLAGS="-D_CRT_SECURE_NO_DEPRECATE /EHsc /MP%THREADS% %params%" -DMIPP_STATIC_LIB=OFF -DMIPP_EXAMPLES_EXE=OFF
28 | if %ERRORLEVEL% neq 0 exit %ERRORLEVEL%
29 | rem devenv /build Release MIPP.sln
30 | msbuild MIPP.sln /t:Build /p:Configuration=Release
31 | if %ERRORLEVEL% neq 0 exit %ERRORLEVEL%
32 | move bin\Release\* bin\
33 | rmdir bin\Release\
34 | cd ..
35 | exit /B 0


--------------------------------------------------------------------------------
/ci/coverage-linux.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | set -x
3 | 
4 | cd tests
5 | mkdir code_coverage_report || true
6 | 
7 | genhtml code_coverage_files/* --output-directory ./code_coverage_report/
8 | 


--------------------------------------------------------------------------------
/ci/test-linux-intel-sde-unit.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | set -x
 3 | 
 4 | if [ -z "$INTEL_SDE_ARCH" ]
 5 | then
 6 | 	echo "Please define the 'INTEL_SDE_ARCH' environment variable (ex.: -skx)."
 7 | 	exit 1
 8 | fi
 9 | 
10 | # install Intel SDE emulator --------------------------------------------------
11 | apt update
12 | apt install -y wget xz-utils
13 | echo 0 > /proc/sys/kernel/yama/ptrace_scope
14 | WD=$(pwd)
15 | mkdir softwares
16 | cd softwares
17 | wget https://largo.lip6.fr/monolithe/downloads/sde-external-9.33.0-2024-01-07-lin.tar.xz
18 | tar -xvvf sde-external-9.33.0-2024-01-07-lin.tar.xz
19 | ln -s $WD/softwares/sde-external-9.33.0-2024-01-07-lin $WD/softwares/sde
20 | export PATH=$WD/softwares/sde:$PATH
21 | cd ..
22 | # -----------------------------------------------------------------------------
23 | 
24 | for build in "$@"
25 | do
26 | 	cd $build
27 | 	sde64 $INTEL_SDE_ARCH -- ./bin/run-tests
28 | 	rc=$?; if [[ $rc != 0 ]]; then exit $rc; fi
29 | 	cd ..
30 | done
31 | 


--------------------------------------------------------------------------------
/ci/test-linux-macos-unit.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | set -x
 3 | 
 4 | for build in "$@"
 5 | do
 6 | 	cd $build
 7 | 	if [[ "$build" == *"sve"* ]]; then
 8 | 		source /usr/share/modules/init/profile.sh
 9 | 		module load armie22/22.0
10 | 		nbits=$(echo $build | grep -Eo '[0-9]+(\.[0-9]+)?' | tail -n 1)
11 | 		if [ -z "$nbits" ]
12 | 		then
13 | 			echo "The build name is incompatible with SVE build, it should contain the SIMD size (current wrong build name is '$build', an example of expected build name is: 'build_coverage_linux_armv8_gcc_sve_ls256')."
14 | 			exit 1
15 | 		fi
16 | 		# armie -msve-vector-bits=$nbits -- ./bin/run_tests 2>&1 | tee sve_$nbits.txt
17 | 		# RES=$(cat sve_$nbits.txt | tail -3 | head -n 1)
18 | 		# if [[ "$RES" != *"test cases:   289 |    92 passed | 197 failed"* ]]; then
19 | 		# 	exit 1;
20 | 		# fi
21 | 		armie -msve-vector-bits=$nbits -- ./bin/run-tests
22 | 		rc=$?; if [[ $rc != 0 ]]; then exit $rc; fi
23 | 	else
24 | 		./bin/run-tests
25 | 		rc=$?; if [[ $rc != 0 ]]; then exit $rc; fi
26 | 	fi
27 | 	cd ..
28 | done
29 | 


--------------------------------------------------------------------------------
/ci/test-windows-unit.bat:
--------------------------------------------------------------------------------
 1 | @echo on
 2 | 
 3 | :Loop
 4 | IF "%~1"=="" goto End
 5 | 
 6 |    set build=%~1%
 7 |    cd %build%/bin/
 8 |    run-tests.exe
 9 |    if %ERRORLEVEL% neq 0 exit %ERRORLEVEL%
10 |    cd ../../
11 | 
12 | shift
13 | goto Loop
14 | 
15 | :End
16 | 


--------------------------------------------------------------------------------
/ci/tools/build-linux-x86-gcc-generic.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | #set -x
 3 | 
 4 | function compile {
 5 | 
 6 |     cd tests
 7 |     build_root=build_linux_x86_gcc
 8 | 	build="${build_root}_$1"
 9 | 	mkdir $build
10 | 	cd $build
11 | 	cmake .. -G"Unix Makefiles" -DCMAKE_CXX_COMPILER=g++ -DCMAKE_BUILD_TYPE=Release -DCMAKE_CXX_FLAGS="-Wall -funroll-loops -finline-functions $2"
12 | 	rc=$?; if [[ $rc != 0 ]]; then exit $rc; fi
13 | 	make -j $THREADS
14 | 	rc=$?; if [[ $rc != 0 ]]; then exit $rc; fi
15 | 	cd ../..
16 | }
17 | 


--------------------------------------------------------------------------------
/ci/tools/threads.bat:
--------------------------------------------------------------------------------
1 | @echo off
2 | 
3 | IF "%THREADS%" NEQ "" goto End
4 | set "THREADS=%NUMBER_OF_PROCESSORS%"
5 | IF "%THREADS%"=="" set "THREADS=1"
6 | 
7 | :End


--------------------------------------------------------------------------------
/ci/tools/threads.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | # set -x
 3 | 
 4 | if [ -z "$THREADS" ]; then
 5 | 	if [ ! -f /proc/cpuinfo ]; then
 6 | 		export THREADS=1
 7 | 	else
 8 | 		THREADS=$(grep -c ^processor /proc/cpuinfo)
 9 | 		export THREADS
10 | 	fi
11 | fi
12 | 


--------------------------------------------------------------------------------
/cmake/MIPPConfig.cmake.in:
--------------------------------------------------------------------------------
1 | set(MIPP_VERSION @MIPP_VERSION@)
2 | 
3 | @PACKAGE_INIT@
4 | 
5 | set_and_check(MIPP_INC_DIR "@PACKAGE_INC_INSTALL_DIR@")
6 | 
7 | # Add the targets file
8 | include("${CMAKE_CURRENT_LIST_DIR}/mippTargets.cmake")
9 | 


--------------------------------------------------------------------------------
/cmake/cmake_uninstall.cmake.in:
--------------------------------------------------------------------------------
 1 | # http://www.vtk.org/Wiki/CMake_FAQ#Can_I_do_.22make_uninstall.22_with_CMake.3F
 2 | 
 3 | IF(NOT EXISTS "@CMAKE_CURRENT_BINARY_DIR@/install_manifest.txt")
 4 |   MESSAGE(FATAL_ERROR "Cannot find install manifest: \"@CMAKE_CURRENT_BINARY_DIR@/install_manifest.txt\"")
 5 | ENDIF(NOT EXISTS "@CMAKE_CURRENT_BINARY_DIR@/install_manifest.txt")
 6 | 
 7 | FILE(READ "@CMAKE_CURRENT_BINARY_DIR@/install_manifest.txt" files)
 8 | STRING(REGEX REPLACE "\n" ";" files "${files}")
 9 | FOREACH(file ${files})
10 |   MESSAGE(STATUS "Uninstalling \"$ENV{DESTDIR}${file}\"")
11 |   IF(EXISTS "$ENV{DESTDIR}${file}")
12 |     EXEC_PROGRAM(
13 |       "@CMAKE_COMMAND@" ARGS "-E remove \"$ENV{DESTDIR}${file}\""
14 |       OUTPUT_VARIABLE rm_out
15 |       RETURN_VALUE rm_retval
16 |       )
17 |     IF(NOT "${rm_retval}" STREQUAL 0)
18 |       MESSAGE(FATAL_ERROR "Problem when removing \"$ENV{DESTDIR}${file}\"")
19 |     ENDIF(NOT "${rm_retval}" STREQUAL 0)
20 |   ELSEIF(IS_SYMLINK "$ENV{DESTDIR}${file}")
21 |     EXEC_PROGRAM(
22 |       "@CMAKE_COMMAND@" ARGS "-E remove \"$ENV{DESTDIR}${file}\""
23 |       OUTPUT_VARIABLE rm_out
24 |       RETURN_VALUE rm_retval
25 |       )
26 |     IF(NOT "${rm_retval}" STREQUAL 0)
27 |       MESSAGE(FATAL_ERROR "Problem when removing \"$ENV{DESTDIR}${file}\"")
28 |     ENDIF(NOT "${rm_retval}" STREQUAL 0)
29 |   ELSE(EXISTS "$ENV{DESTDIR}${file}")
30 |     MESSAGE(STATUS "File \"$ENV{DESTDIR}${file}\" does not exist.")
31 |   ENDIF(EXISTS "$ENV{DESTDIR}${file}")
32 | ENDFOREACH(file)
33 | 


--------------------------------------------------------------------------------
/codegen/gen_compress.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3
 2 | 
 3 | from jinja2 import Environment, FileSystemLoader
 4 | from pathlib import Path
 5 | import sys
 6 | 
 7 | if sys.version_info[0] != 3 or sys.version_info[1] < 5:
 8 |     print("This script requires Python version 3.5")
 9 |     sys.exit(1)
10 | 
11 | env = Environment(loader = FileSystemLoader("codegen"))
12 | template_lut = env.get_template("vcompress-LUT.cpp.j2")
13 | template_file = env.get_template("mipp_LUT.cpp.j2")
14 | 
15 | def generate_lut(entries, simdwidth, words_per_simd):
16 |     lut = [[0 for j in range(0, simdwidth)] for i in range(0, entries)]
17 | 
18 |     elem_bytes = simdwidth // words_per_simd
19 | 
20 |     for i in range(0, entries):
21 |         mask = i
22 |         j = 0
23 | 
24 |         for k in range(0, words_per_simd):
25 |             for b in range(0, elem_bytes):
26 |                 lut[i][j + b] = k * elem_bytes + b
27 | 
28 |             if mask & 1 == 1:
29 |                 j += elem_bytes
30 |             mask >>= 1
31 | 
32 |         for k in range(j, simdwidth):
33 |             lut[i][k] = -1
34 | 
35 |     return lut
36 | 
37 | def write_all_luts(filename, all_luts):
38 |     all_content = template_file.render(
39 |         luts = all_luts,
40 |     )
41 | 
42 |     with open(filename, "w+") as file:
43 |         file.write(all_content)
44 | 
45 | def generate_luts(filename, simdname, simdwidth, entrydef, lut_params_list):
46 |     (entrytype, entrybytes) = entrydef
47 | 
48 |     all_luts = []
49 |     for entries, simd_words in lut_params_list:
50 | 
51 |         elem_bits = (simdwidth // simd_words) * entrybytes * 8
52 | 
53 |         lut = template_lut.render(
54 |             lutname = f"vcompress_LUT{elem_bits}x{simd_words}_{simdname}",
55 |             entries = entries,
56 |             simdwidth = simdwidth,
57 |             entrytype = entrytype,
58 |             lut = generate_lut(entries, simdwidth, simd_words)
59 |         )
60 | 
61 |         all_luts += [lut]
62 | 
63 |     write_all_luts(filename, all_luts)
64 | 
65 | def generate_AVX_luts(filename):
66 |     lut_AVX_32x8 = template_lut.render(
67 |         lutname = f"vcompress_LUT32x8_AVX",
68 |         entries = 256,
69 |         simdwidth = 8,
70 |         entrytype = "int32_t",
71 |         lut = generate_lut(256, 8, 8)
72 |     )
73 | 
74 |     lut_AVX_64x4 = template_lut.render(
75 |         lutname = "vcompress_LUT64x4_AVX",
76 |         entries = 16,
77 |         simdwidth = 8,
78 |         entrytype = "int32_t",
79 |         lut = generate_lut(16, 8, 4)
80 |     )
81 | 
82 |     all_luts = [lut_AVX_32x8, lut_AVX_64x4]
83 | 
84 |     write_all_luts(filename, all_luts)
85 |     pass
86 | 
87 | Path("src/gen").mkdir(parents=True, exist_ok=True)
88 | 
89 | generate_luts("src/gen/compress_LUT_SSE.cpp",  "SSE",  16, ("int8_t", 1),  [(4, 2), (16, 4), (256, 8), (65536, 16)])
90 | generate_luts("src/gen/compress_LUT_NEON.cpp", "NEON", 16, ("int8_t", 1),  [(4, 2), (16, 4), (256, 8), (65536, 16)])
91 | 
92 | #generate_luts("src/gen/mipp_compress_LUT_AVX.cpp",  "AVX",  8, ("int32_t", 4), [(256, 8)])
93 | generate_AVX_luts("src/gen/compress_LUT_AVX.cpp")
94 | 


--------------------------------------------------------------------------------
/codegen/mipp_LUT.cpp.j2:
--------------------------------------------------------------------------------
 1 | #include <stdint.h>
 2 | 
 3 | namespace mipp {
 4 | 
 5 | {% for lut in luts -%}
 6 |   {{ lut }}
 7 |   
 8 | {% endfor %}
 9 | 
10 | }
11 | 


--------------------------------------------------------------------------------
/codegen/requirements.txt:
--------------------------------------------------------------------------------
1 | jinja2==3.0.0
2 | 


--------------------------------------------------------------------------------
/codegen/vcompress-LUT.cpp.j2:
--------------------------------------------------------------------------------
 1 | alignas(32)
 2 | {{ entrytype }} {{ lutname }}[{{ entries }}][{{ simdwidth }}] = {
 3 | {%- for i in range(0, entries) %}
 4 |   {
 5 |   {%- for j in range(0, simdwidth) -%}
 6 |     {{ "%3d" | format(lut[i][j]) }},
 7 |   {%- endfor -%}
 8 |   },
 9 | {%- endfor %}
10 | };
11 | 


--------------------------------------------------------------------------------
/examples/conversion.cpp:
--------------------------------------------------------------------------------
  1 | #include <iostream>
  2 | #include <random>
  3 | #include <algorithm>
  4 | 
  5 | #include <mipp.h>
  6 | 
  7 | int main(int argc, char** argv)
  8 | {
  9 | 	// ------------------------------------------------------------------------
 10 | 	std::cout << "MIPP example" << std::endl;
 11 | 	std::cout << "------------" << std::endl;
 12 | 	std::cout << " - Instr. type:       " << mipp::InstructionType                  << std::endl;
 13 | 	std::cout << " - Instr. full type:  " << mipp::InstructionFullType              << std::endl;
 14 | 	std::cout << " - Instr. version:    " << mipp::InstructionVersion               << std::endl;
 15 | 	std::cout << " - Reg. size:         " << mipp::RegisterSizeBit       << " bits" << std::endl;
 16 | 	std::cout << " - Reg. lanes:        " << mipp::Lanes                            << std::endl;
 17 | 	std::cout << " - 64-bit support:    " << (mipp::Support64Bit    ? "yes" : "no") << std::endl;
 18 | 	std::cout << " - Byte/word support: " << (mipp::SupportByteWord ? "yes" : "no") << std::endl;
 19 | 	auto ext = mipp::InstructionExtensions();
 20 | 	if (ext.size() > 0)
 21 | 	{
 22 | 		std::cout << " - Instr. extensions: {";
 23 | 		for (auto i = 0; i < (int)ext.size(); i++)
 24 | 			std::cout << ext[i] << (i < ((int)ext.size() -1) ? ", " : "");
 25 | 		std::cout << "}" << std::endl;
 26 | 	}
 27 | 	std::cout << std::endl;
 28 | 	// ------------------------------------------------------------------------
 29 | 
 30 | 	std::random_device rd;
 31 | 	std::mt19937 g(rd());
 32 | 
 33 | 	signed char t_1[mipp::N<signed char>()];
 34 | 	for (auto i = 0; i < mipp::N<signed char>(); i++) t_1[i] = i+1;
 35 | 	// std::shuffle(t_1, t_1 + mipp::N<signed char>(), g);
 36 | 
 37 | 	mipp::Reg<signed char> in_1; in_1.loadu(t_1);
 38 | 
 39 | 	std::cout << "---------------------------- int8" << std::endl << std::endl;
 40 | 
 41 | 	std::cout << "Input vectors (signed char): " << std::endl;
 42 | 	std::cout << "in_1  = " << in_1 << std::endl;
 43 | 	std::cout << std::endl;
 44 | 
 45 | 	std::cout << "--------------------------- int16" << std::endl << std::endl;
 46 | 
 47 | 	auto low  = in_1.low ();
 48 | 	auto high = in_1.high();
 49 | 
 50 | 	auto low_short = low.template cvt<short>();
 51 | 	std::cout << "Output vectors (low.cvt<short>()): " << std::endl;
 52 | 	std::cout << "low  = " << low_short << std::endl;
 53 | 	std::cout << std::endl;
 54 | 
 55 | 	auto high_short = high.template cvt<short>();
 56 | 	std::cout << "Output vectors (hight.cvt<short>()): " << std::endl;
 57 | 	std::cout << "high = " << high_short << std::endl;
 58 | 	std::cout << std::endl;
 59 | 
 60 | 	std::cout << "--------------------------- int32" << std::endl << std::endl;
 61 | 
 62 | 	auto low_low_short  = low_short.low();
 63 | 	auto high_low_short = low_short.high();
 64 | 
 65 | 	auto low_low_int = low_low_short.template cvt<int>();
 66 | 	std::cout << "Output vectors (low_low_short.cvt<int>()): " << std::endl;
 67 | 	std::cout << "low  = " << low_low_int << std::endl;
 68 | 	std::cout << std::endl;
 69 | 
 70 | 	auto high_low_int = high_low_short.template cvt<int>();
 71 | 	std::cout << "Output vectors (high_low_short.cvt<int>()): " << std::endl;
 72 | 	std::cout << "high = " << high_low_int << std::endl;
 73 | 	std::cout << std::endl;
 74 | 
 75 | 	auto low_high_short  = high_short.low();
 76 | 	auto high_high_short = high_short.high();
 77 | 
 78 | 	auto low_high_int = low_high_short.template cvt<int>();
 79 | 	std::cout << "Output vectors (low_high_short.cvt<int>()): " << std::endl;
 80 | 	std::cout << "low  = " << low_high_int << std::endl;
 81 | 	std::cout << std::endl;
 82 | 
 83 | 	auto high_high_int = high_high_short.template cvt<int>();
 84 | 	std::cout << "Output vectors (high_high_short.cvt<int>()): " << std::endl;
 85 | 	std::cout << "high = " << high_high_int << std::endl;
 86 | 	std::cout << std::endl;
 87 | 
 88 | 	std::cout << "------------------------- float32" << std::endl << std::endl;
 89 | 
 90 | 	auto low_low_float = low_low_int.template cvt<float>() + 0.1f;
 91 | 	std::cout << "Output vectors (low_low_int.cvt<float>() + 0.1f): " << std::endl;
 92 | 	std::cout << "p1   = " << low_low_float << std::endl;
 93 | 	std::cout << std::endl;
 94 | 
 95 | 	auto high_low_float = high_low_int.template cvt<float>() + 0.1f;
 96 | 	std::cout << "Output vectors (high_low_int.cvt<float>() + 0.1f): " << std::endl;
 97 | 	std::cout << "p2   = " << high_low_float << std::endl;
 98 | 	std::cout << std::endl;
 99 | 
100 | 	auto low_high_float = low_high_int.template cvt<float>() + 0.1f;
101 | 	std::cout << "Output vectors (low_high_int.cvt<float>() + 0.1f): " << std::endl;
102 | 	std::cout << "p3   = " << low_high_float << std::endl;
103 | 	std::cout << std::endl;
104 | 
105 | 	auto high_high_float = high_high_int.template cvt<float>() + 0.1f;
106 | 	std::cout << "Output vectors (high_high_int.cvt<float>() + 0.1f): " << std::endl;
107 | 	std::cout << "p4   = " << high_high_float << std::endl;
108 | 	std::cout << std::endl;
109 | 
110 | 	return 0;
111 | }
112 | 


--------------------------------------------------------------------------------
/examples/gemm.cpp:
--------------------------------------------------------------------------------
 1 | #ifndef BLOCKI
 2 | #define BLOCKI 64
 3 | #endif
 4 | #ifndef BLOCKJ
 5 | #define BLOCKJ 64
 6 | #endif
 7 | #ifndef BLOCKK
 8 | #define BLOCKK 256
 9 | #endif
10 | 
11 | #ifndef TYPE
12 | #define TYPE double
13 | #endif
14 | #include <iostream>
15 | #include <mipp.h>
16 | 
17 | extern "C" {
18 | double k_1x1x1(TYPE *vA, TYPE *vB, TYPE *vC);
19 | }
20 | TYPE k_1x1x1(TYPE *vA, TYPE *vB, TYPE *vC) { 
21 |     const int nv=mipp::N<TYPE>();                  
22 |     TYPE *B=&vB[0];                
23 |     TYPE *C=&vC[0];                
24 |     TYPE *A=&vA[0];                               
25 |     for (int i=0;i<BLOCKI;i+=1) {                
26 |        for (int j=0;j<BLOCKJ;j+=1*nv) {           
27 |           mipp::Reg<TYPE> c00;
28 |           c00.load(&C[(i+0)*BLOCKJ + j + (0)*nv]);
29 |           for (int k=0;k<BLOCKK;k+=1) {            
30 |              mipp::Reg<TYPE> a00;
31 |              a00 = mipp::set1<TYPE>(A[(i+0)*BLOCKK+k+0]);
32 |              mipp::Reg<TYPE> b00;
33 |              b00.load(&B[(k+0)*BLOCKJ + j + (0)*nv]);
34 |              c00 =mipp::fmadd(a00, b00,c00);
35 |           }
36 |           c00.store(&C[(i+0)*BLOCKJ + j + (0)*nv]);
37 |        }                                           
38 |     }
39 |     return C[0];
40 | }
41 | 
42 | int main(int argc, char** argv)
43 | {
44 |   // --------------------------------------------------------------------------
45 |   std::cout << "MIPP example" << std::endl;
46 |   std::cout << "------------" << std::endl;
47 |   std::cout << " - Instr. type:       " << mipp::InstructionType                  << std::endl;
48 |   std::cout << " - Instr. full type:  " << mipp::InstructionFullType              << std::endl;
49 |   std::cout << " - Instr. version:    " << mipp::InstructionVersion               << std::endl;
50 |   std::cout << " - Reg. size:         " << mipp::RegisterSizeBit       << " bits" << std::endl;
51 |   std::cout << " - Reg. lanes:        " << mipp::Lanes                            << std::endl;
52 |   std::cout << " - 64-bit support:    " << (mipp::Support64Bit    ? "yes" : "no") << std::endl;
53 |   std::cout << " - Byte/word support: " << (mipp::SupportByteWord ? "yes" : "no") << std::endl;
54 |   auto ext = mipp::InstructionExtensions();
55 |   if (ext.size() > 0)
56 |   {
57 |     std::cout << " - Instr. extensions: {";
58 |     for (auto i = 0; i < (int)ext.size(); i++)
59 |       std::cout << ext[i] << (i < ((int)ext.size() -1) ? ", " : "");
60 |     std::cout << "}" << std::endl;
61 |   }
62 |   std::cout << std::endl;
63 |   // --------------------------------------------------------------------------
64 | 
65 |   mipp::vector<TYPE> A(BLOCKI*BLOCKK, 1);
66 |   mipp::vector<TYPE> B(BLOCKK*BLOCKJ, 2);
67 |   mipp::vector<TYPE> C(BLOCKI*BLOCKJ, 3);
68 |   TYPE s=0;
69 |   for (int i=0;i<100;i++) 
70 |     s+=k_1x1x1(&A[0],&B[0],&C[0]);
71 |   std::cout << s << std::endl;
72 |   return 0;
73 | }
74 | 
75 | 


--------------------------------------------------------------------------------
/examples/initreg.cpp:
--------------------------------------------------------------------------------
 1 | #include <iostream>
 2 | #include <random>
 3 | #include <algorithm>
 4 | 
 5 | #include <mipp.h>
 6 | 
 7 | int main(int argc, char** argv)
 8 | {
 9 | 	// ------------------------------------------------------------------------
10 | 	std::cout << "MIPP example" << std::endl;
11 | 	std::cout << "------------" << std::endl;
12 | 	std::cout << " - Instr. type:       " << mipp::InstructionType                  << std::endl;
13 | 	std::cout << " - Instr. full type:  " << mipp::InstructionFullType              << std::endl;
14 | 	std::cout << " - Instr. version:    " << mipp::InstructionVersion               << std::endl;
15 | 	std::cout << " - Reg. size:         " << mipp::RegisterSizeBit       << " bits" << std::endl;
16 | 	std::cout << " - Reg. lanes:        " << mipp::Lanes                            << std::endl;
17 | 	std::cout << " - 64-bit support:    " << (mipp::Support64Bit    ? "yes" : "no") << std::endl;
18 | 	std::cout << " - Byte/word support: " << (mipp::SupportByteWord ? "yes" : "no") << std::endl;
19 | 	auto ext = mipp::InstructionExtensions();
20 | 	if (ext.size() > 0)
21 | 	{
22 | 		std::cout << " - Instr. extensions: {";
23 | 		for (auto i = 0; i < (int)ext.size(); i++)
24 | 			std::cout << ext[i] << (i < ((int)ext.size() -1) ? ", " : "");
25 | 		std::cout << "}" << std::endl;
26 | 	}
27 | 	std::cout << std::endl;
28 | 	// ------------------------------------------------------------------------
29 | 
30 | 	std::random_device rd;
31 | 	std::mt19937 g(rd());
32 | 
33 | 	using type = float;
34 | 
35 | 	type t_1[mipp::N<type>()];
36 | 	for (auto i = 0; i < mipp::N<type>(); i++) t_1[i] = i;
37 | 	std::shuffle(t_1, t_1 + mipp::N<type>(), g);
38 | 
39 | 	mipp::Reg<type> in_1 = {0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31};
40 | 	mipp::Reg<type> in_2 = 12;
41 | 	mipp::Reg<type> in_3 = t_1;
42 | 
43 | 	std::cout << "Input vectors: " << std::endl;
44 | 	std::cout << "in_1 = " << in_1 << std::endl;
45 | 	std::cout << "in_2 = " << in_2 << std::endl;
46 | 	std::cout << "in_3 = " << in_3 << std::endl;
47 | 
48 | 	return 0;
49 | }
50 | 


--------------------------------------------------------------------------------
/examples/mask.cpp:
--------------------------------------------------------------------------------
  1 | #include <iostream>
  2 | #include <random>
  3 | #include <algorithm>
  4 | 
  5 | #include <mipp.h>
  6 | 
  7 | int main(int argc, char** argv)
  8 | {
  9 | 	// ------------------------------------------------------------------------
 10 | 	std::cout << "MIPP example" << std::endl;
 11 | 	std::cout << "------------" << std::endl;
 12 | 	std::cout << " - Instr. type:       " << mipp::InstructionType                  << std::endl;
 13 | 	std::cout << " - Instr. full type:  " << mipp::InstructionFullType              << std::endl;
 14 | 	std::cout << " - Instr. version:    " << mipp::InstructionVersion               << std::endl;
 15 | 	std::cout << " - Reg. size:         " << mipp::RegisterSizeBit       << " bits" << std::endl;
 16 | 	std::cout << " - Reg. lanes:        " << mipp::Lanes                            << std::endl;
 17 | 	std::cout << " - 64-bit support:    " << (mipp::Support64Bit    ? "yes" : "no") << std::endl;
 18 | 	std::cout << " - Byte/word support: " << (mipp::SupportByteWord ? "yes" : "no") << std::endl;
 19 | 	auto ext = mipp::InstructionExtensions();
 20 | 	if (ext.size() > 0)
 21 | 	{
 22 | 		std::cout << " - Instr. extensions: {";
 23 | 		for (auto i = 0; i < (int)ext.size(); i++)
 24 | 			std::cout << ext[i] << (i < ((int)ext.size() -1) ? ", " : "");
 25 | 		std::cout << "}" << std::endl;
 26 | 	}
 27 | 	std::cout << std::endl;
 28 | 	// ------------------------------------------------------------------------
 29 | 
 30 | 	std::random_device rd;
 31 | 	std::mt19937 g(rd());
 32 | 
 33 | 	using T = float;
 34 | 	constexpr int N = mipp::N<T>();
 35 | 
 36 | 	T t_1[N];
 37 | 	for (auto i = 0; i < N; i++) t_1[i] = i+1;
 38 | 	std::shuffle(t_1, t_1 + N, g);
 39 | 
 40 | 	T t_2[N];
 41 | 	for (auto i = 0; i < N; i++) t_2[i] = i+1;
 42 | 	std::shuffle(t_2, t_2 + N, g);
 43 | 
 44 | 	mipp::Reg<T> in_1 = t_1;
 45 | 	mipp::Reg<T> in_2 = t_2;
 46 | 	mipp::Reg<T> in_3 = (T)-1;
 47 | 	mipp::Msk<N> m_1  = false;
 48 | 	mipp::Msk<N> m_2  = true;
 49 | 	mipp::Msk<N> m_3  = {true, false,true, true, false,true ,true ,true ,
 50 | 	                     false,false,false,false,false,true ,false,true ,
 51 | 	                     false,true ,true ,true ,false,true ,false,true ,
 52 | 	                     true ,true ,false,true ,false,false,false,false};
 53 | 	mipp::Msk<N> m_4  = {false,true ,false,false,true ,false,false,false,
 54 | 	                     true ,true ,true ,true ,true ,false,true ,false,
 55 | 	                     true ,false,false,false,true ,false,true ,false,
 56 | 	                     false,false,true ,false,true ,true ,true ,true };
 57 | 
 58 | 	std::cout << "Input vectors: " << std::endl;
 59 | 	std::cout << "in_1 = " << in_1 << std::endl;
 60 | 	std::cout << "in_2 = " << in_2 << std::endl;
 61 | 	std::cout << "in_3 = " << in_3 << std::endl;
 62 | 	std::cout << "m_1  = " << m_1  << std::endl;
 63 | 	std::cout << "m_2  = " << m_2  << std::endl;
 64 | 	std::cout << "m_3  = " << m_3  << std::endl;
 65 | 	std::cout << "m_4  = " << m_4  << std::endl;
 66 | 	std::cout << std::endl;
 67 | 
 68 |     std::cout << "r_1 = m_1.toReg<T>(): " << std::endl;
 69 |     mipp::Reg<T> r_1 = m_1.toReg<T>();
 70 |     std::cout << "r_1 = " << r_1 << std::endl;
 71 |     std::cout << std::endl;
 72 | 
 73 |     std::cout << "r_2 = m_2.toReg<T>(): " << std::endl;
 74 |     mipp::Reg<T> r_2 = m_2.toReg<T>();
 75 |     std::cout << "r_2 = " << r_2 << std::endl;
 76 |     std::cout << std::endl;
 77 | 
 78 | 	std::cout << "Output vector (mout = m_3; mout.set0()): " << std::endl;
 79 | 	auto mout = m_3;
 80 | 	mout.set0();
 81 | 	std::cout << "mout = " << mout << std::endl;
 82 | 	std::cout << std::endl;
 83 | 
 84 | 	std::cout << "Output vector (m_3 & (in_1 - in_2)): " << std::endl;
 85 | 	auto out = mipp::maskz<T,mipp::sub>(m_3, in_1, in_2);
 86 | 	std::cout << "out  = " << out << std::endl;
 87 | 	std::cout << std::endl;
 88 | 
 89 | 	std::cout << "Output mask (in_1 > in_2): " << std::endl;
 90 | 	auto m_5 = in_1 > in_2;
 91 | 	std::cout << "m_5  = " << m_5 << std::endl;
 92 | 	std::cout << std::endl;
 93 | 
 94 | 	std::cout << "Output vector ((m_5 & (in_1 + in_2)) | (~m_5 & in_3)): " << std::endl;
 95 | 	out = mipp::mask<T,mipp::add>(m_5, in_3, in_1, in_2);
 96 | 	std::cout << "out  = " << out << std::endl;
 97 | 	std::cout << std::endl;
 98 | 
 99 | 	std::cout << "Output vector (m_3 ^ m_4): " << std::endl;
100 | 	mout = m_3 | m_4;
101 | 	std::cout << "mout = " << mout << std::endl;
102 | 	std::cout << std::endl;
103 | 
104 | 	std::cout << "Output vector (m_3 | m_4): " << std::endl;
105 | 	mout = m_3 ^ m_4;
106 | 	std::cout << "mout = " << mout << std::endl;
107 | 	std::cout << std::endl;
108 | 
109 | 	std::cout << "Output vector (m_3 & m_4): " << std::endl;
110 | 	mout = m_3 & m_4;
111 | 	std::cout << "mout = " << mout << std::endl;
112 | 	std::cout << std::endl;
113 | 
114 | 	std::cout << "Output vector (~mout): " << std::endl;
115 | 	mout = ~mout;
116 | 	std::cout << "mout = " << mout << std::endl;
117 | 	std::cout << std::endl;
118 | 
119 | 	std::cout << "Output vector (in_1 - in_2): " << std::endl;
120 | 	auto out2 = in_1 - in_2;
121 | 	std::cout << "out2 = " << out2 << std::endl;
122 | 	std::cout << std::endl;
123 | 
124 | 	std::cout << "Output vector (mipp::sign(out2)): " << std::endl;
125 | 	mout = mipp::sign(out2);
126 | 	std::cout << "mout = " << mout << std::endl;
127 | 	std::cout << std::endl;
128 | 
129 | 	std::cout << "Output vector (mipp::neg(out2, mout)): " << std::endl;
130 | 	out2 = mipp::neg(out2, mout);
131 | 	std::cout << "out2 = " << out2 << std::endl;
132 | 	std::cout << std::endl;
133 | 
134 | #if !defined(MIPP_NEON)
135 | 	std::cout << "Output vector (m_2 << 0): " << std::endl;
136 | 	mout = m_2 << 0;
137 | 	std::cout << "mout = " << mout << std::endl;
138 | 	std::cout << std::endl;
139 | 
140 | 	std::cout << "Output vector (m_2 << 1): " << std::endl;
141 | 	mout = m_2 << 1;
142 | 	std::cout << "mout = " << mout << std::endl;
143 | 	std::cout << std::endl;
144 | 
145 | 	std::cout << "Output vector (m_2 >> 2): " << std::endl;
146 | 	mout = m_2 >> 2;
147 | 	std::cout << "mout = " << mout << std::endl;
148 | 	std::cout << std::endl;
149 | #endif
150 | 
151 | 	return 0;
152 | }
153 | 


--------------------------------------------------------------------------------
/examples/mathfun.cpp:
--------------------------------------------------------------------------------
 1 | #include <iostream>
 2 | #include <random>
 3 | #include <algorithm>
 4 | 
 5 | #include <mipp.h>
 6 | 
 7 | int main(int argc, char** argv)
 8 | {
 9 | 	// ------------------------------------------------------------------------
10 | 	std::cout << "MIPP example" << std::endl;
11 | 	std::cout << "------------" << std::endl;
12 | 	std::cout << " - Instr. type:       " << mipp::InstructionType                  << std::endl;
13 | 	std::cout << " - Instr. full type:  " << mipp::InstructionFullType              << std::endl;
14 | 	std::cout << " - Instr. version:    " << mipp::InstructionVersion               << std::endl;
15 | 	std::cout << " - Reg. size:         " << mipp::RegisterSizeBit       << " bits" << std::endl;
16 | 	std::cout << " - Reg. lanes:        " << mipp::Lanes                            << std::endl;
17 | 	std::cout << " - 64-bit support:    " << (mipp::Support64Bit    ? "yes" : "no") << std::endl;
18 | 	std::cout << " - Byte/word support: " << (mipp::SupportByteWord ? "yes" : "no") << std::endl;
19 | 	auto ext = mipp::InstructionExtensions();
20 | 	if (ext.size() > 0)
21 | 	{
22 | 		std::cout << " - Instr. extensions: {";
23 | 		for (auto i = 0; i < (int)ext.size(); i++)
24 | 			std::cout << ext[i] << (i < ((int)ext.size() -1) ? ", " : "");
25 | 		std::cout << "}" << std::endl;
26 | 	}
27 | 	std::cout << std::endl;
28 | 	// ------------------------------------------------------------------------
29 | 
30 | 	std::random_device rd;
31 | 	std::mt19937 g(rd());
32 | 
33 | 	using type = float;
34 | 
35 | 	type t_1[mipp::N<type>()];
36 | 	for (auto i = 0; i < mipp::N<type>(); i++) t_1[i] = 1;
37 | 	std::shuffle(t_1, t_1 + mipp::N<type>(), g);
38 | 
39 | 	type t_2[mipp::N<type>()];
40 | 	for (auto i = 0; i < mipp::N<type>(); i++) t_2[i] = i+1;
41 | 	std::shuffle(t_2, t_2 + mipp::N<type>(), g);
42 | 
43 | 	type t_3[mipp::N<type>()];
44 | 	for (auto i = 0; i < mipp::N<type>(); i++) t_3[i] = 3.14;
45 | 	std::shuffle(t_3, t_3 + mipp::N<type>(), g);
46 | 
47 | 	mipp::Reg<type> in_1; in_1.loadu(t_1);
48 | 	mipp::Reg<type> in_2; in_2.loadu(t_2);
49 | 	mipp::Reg<type> in_3; in_3.loadu(t_3);
50 | 
51 | 	std::cout << "Input vectors: " << std::endl;
52 | 	std::cout << "in_1 = " << in_1 << std::endl;
53 | 	std::cout << "in_2 = " << in_2 << std::endl;
54 | 	std::cout << std::endl;
55 | 
56 | 	auto out = in_1.log();
57 | 	std::cout << "Output vectors (in_1.log()): " << std::endl;
58 | 	std::cout << "out  = " << out << std::endl;
59 | 	std::cout << std::endl;
60 | 
61 | 	out = mipp::exp(in_1);
62 | 	std::cout << "Output vectors (mipp::exp(in_1)): " << std::endl;
63 | 	std::cout << "out  = " << out << std::endl;
64 | 	std::cout << std::endl;
65 | 
66 | 	out = in_2.exp();
67 | 	std::cout << "Output vectors (in_2.exp()): " << std::endl;
68 | 	std::cout << "out  = " << out << std::endl;
69 | 	std::cout << std::endl;
70 | 
71 | 	out = in_3.sin();
72 | 	std::cout << "Output vectors (in_3.sin()): " << std::endl;
73 | 	std::cout << "out  = " << out << std::endl;
74 | 	std::cout << std::endl;
75 | 
76 | 	out = in_3.cos();
77 | 	std::cout << "Output vectors (in_3.cos()): " << std::endl;
78 | 	std::cout << "out  = " << out << std::endl;
79 | 	std::cout << std::endl;
80 | 
81 | 	mipp::Reg<type> rsin, rcos;
82 | 	in_3.sincos(rsin, rcos);
83 | 	std::cout << "Output vectors (in_3.sincos(rsin, rcos)): " << std::endl;
84 | 	std::cout << "rsin  = " << rsin << std::endl;
85 | 	std::cout << "rcos  = " << rcos << std::endl;
86 | 	std::cout << std::endl;
87 | 
88 | 	return 0;
89 | }
90 | 


--------------------------------------------------------------------------------
/examples/operator.cpp:
--------------------------------------------------------------------------------
  1 | #include <iostream>
  2 | #include <random>
  3 | #include <algorithm>
  4 | 
  5 | #include <mipp.h>
  6 | 
  7 | int main(int argc, char** argv)
  8 | {
  9 | 	// ------------------------------------------------------------------------
 10 | 	std::cout << "MIPP example" << std::endl;
 11 | 	std::cout << "------------" << std::endl;
 12 | 	std::cout << " - Instr. type:       " << mipp::InstructionType                  << std::endl;
 13 | 	std::cout << " - Instr. full type:  " << mipp::InstructionFullType              << std::endl;
 14 | 	std::cout << " - Instr. version:    " << mipp::InstructionVersion               << std::endl;
 15 | 	std::cout << " - Reg. size:         " << mipp::RegisterSizeBit       << " bits" << std::endl;
 16 | 	std::cout << " - Reg. lanes:        " << mipp::Lanes                            << std::endl;
 17 | 	std::cout << " - 64-bit support:    " << (mipp::Support64Bit    ? "yes" : "no") << std::endl;
 18 | 	std::cout << " - Byte/word support: " << (mipp::SupportByteWord ? "yes" : "no") << std::endl;
 19 | 	auto ext = mipp::InstructionExtensions();
 20 | 	if (ext.size() > 0)
 21 | 	{
 22 | 		std::cout << " - Instr. extensions: {";
 23 | 		for (auto i = 0; i < (int)ext.size(); i++)
 24 | 			std::cout << ext[i] << (i < ((int)ext.size() -1) ? ", " : "");
 25 | 		std::cout << "}" << std::endl;
 26 | 	}
 27 | 	std::cout << std::endl;
 28 | 	// ------------------------------------------------------------------------
 29 | 
 30 | 	std::random_device rd;
 31 | 	std::mt19937 g(rd());
 32 | 
 33 | 	using type = float;
 34 | 
 35 | 	type t_1[mipp::N<type>()];
 36 | 	for (auto i = 0; i < mipp::N<type>(); i++) t_1[i] = i+1;
 37 | 	std::shuffle(t_1, t_1 + mipp::N<type>(), g);
 38 | 
 39 | 	type t_2[mipp::N<type>()];
 40 | 	for (auto i = 0; i < mipp::N<type>(); i++) t_2[i] = i+1;
 41 | 	std::shuffle(t_2, t_2 + mipp::N<type>(), g);
 42 | 
 43 | 	mipp::Reg<type> in_1(t_1);
 44 | 	mipp::Reg<type> in_2(t_2);
 45 | 
 46 | 	std::cout << "Input vectors: " << std::endl;
 47 | 	std::cout << "in_1 = " << in_1 << std::endl;
 48 | 	std::cout << "in_2 = " << in_2 << std::endl;
 49 | 	std::cout << std::endl;
 50 | 
 51 | 	auto out = in_1 + in_2;
 52 | 	// auto out = in_1.add(in_2);
 53 | 	std::cout << "Output vectors (in_1 + in_2): " << std::endl;
 54 | 	std::cout << "out  = " << out << std::endl;
 55 | 	std::cout << std::endl;
 56 | 
 57 | 	out = in_1 - in_2;
 58 | 	std::cout << "Output vectors (in_1 - in_2): " << std::endl;
 59 | 	std::cout << "out  = " << out << std::endl;
 60 | 	std::cout << std::endl;
 61 | 
 62 | #ifndef MIPP_NO_INTRINSICS
 63 | 	if (typeid(type) == typeid(int) || typeid(type) == typeid(short) || typeid(type) == typeid(signed char))
 64 | 	{
 65 | 		out = in_1;
 66 | 		out = ~out;
 67 | 		std::cout << "Output vectors (~in_1): " << std::endl;
 68 | 		std::cout << "out  = " << out << std::endl;
 69 | 		std::cout << std::endl;
 70 | 
 71 | 		out = in_1 | in_2;
 72 | 		std::cout << "Output vectors (in_1 | in_2): " << std::endl;
 73 | 		std::cout << "out  = " << out << std::endl;
 74 | 		std::cout << std::endl;
 75 | 
 76 | 		out = in_1 << 1;
 77 | 		std::cout << "Output vectors (in_1 << 1): " << std::endl;
 78 | 		std::cout << "out  = " << out << std::endl;
 79 | 		std::cout << std::endl;
 80 | 
 81 | 		out = in_1 >> 1;
 82 | 		std::cout << "Output vectors (in_1 >> 1): " << std::endl;
 83 | 		std::cout << "out  = " << out << std::endl;
 84 | 		std::cout << std::endl;
 85 | 	}
 86 | #endif
 87 | 
 88 | 	auto msk = (in_1 == in_2);
 89 | 	std::cout << "Output vectors (in_1 == in_2): " << std::endl;
 90 | 	std::cout << "msk  = " << msk << std::endl;
 91 | 	std::cout << std::endl;
 92 | 
 93 | 	msk = (in_1 > in_2);
 94 | 	std::cout << "Output vectors (in_1 > in_2): " << std::endl;
 95 | 	std::cout << "msk  = " << msk << std::endl;
 96 | 	std::cout << std::endl;
 97 | 
 98 | 	if (typeid(type) == typeid(double) || typeid(type) == typeid(float))
 99 | 	{
100 | 		msk = (in_1 != in_2);
101 | 		std::cout << "Output vectors (in_1 != in_2): " << std::endl;
102 | 		std::cout << "msk  = " << msk << std::endl;
103 | 		std::cout << std::endl;
104 | 
105 | 		msk = (in_1 < in_2);
106 | 		std::cout << "Output vectors (in_1 < in_2): " << std::endl;
107 | 		std::cout << "msk  = " << msk << std::endl;
108 | 		std::cout << std::endl;
109 | 
110 | 		msk = (in_1 <= in_2);
111 | 		std::cout << "Output vectors (in_1 <= in_2): " << std::endl;
112 | 		std::cout << "msk  = " << msk << std::endl;
113 | 		std::cout << std::endl;
114 | 
115 | 		msk = (in_1 >= in_2);
116 | 		std::cout << "Output vectors (in_1 >= in_2): " << std::endl;
117 | 		std::cout << "msk  = " << msk << std::endl;
118 | 		std::cout << std::endl;
119 | 
120 | 		out = in_1 * in_2;
121 | 		std::cout << "Output vectors (in_1 * in_2): " << std::endl;
122 | 		std::cout << "out  = " << out << std::endl;
123 | 		std::cout << std::endl;
124 | 
125 | 		out = in_1 / in_2;
126 | 		std::cout << "Output vectors (in_1 / in_2): " << std::endl;
127 | 		std::cout << "out  = " << out << std::endl;
128 | 		std::cout << std::endl;
129 | 
130 | 		auto two = mipp::Reg<type>(2);
131 | 		out = in_1 + in_2 * two;
132 | 		std::cout << "Output vectors (in_1 + in_2 * two): " << std::endl;
133 | 		std::cout << "out  = " << out << std::endl;
134 | 		std::cout << std::endl;
135 | 	}
136 | 
137 | 	out = in_1.hmin();
138 | 	std::cout << "Output vectors (in_1.hmin()): " << std::endl;
139 | 	std::cout << "out  = " << out << std::endl;
140 | 	std::cout << std::endl;
141 | 
142 | 	out = in_1 * 3 + 2;
143 | 	std::cout << "Output vectors (in_1 * 3 + 2): " << std::endl;
144 | 	std::cout << "out  = " << out << std::endl;
145 | 	std::cout << std::endl;
146 | 
147 | 	return 0;
148 | }
149 | 


--------------------------------------------------------------------------------
/examples/sort/Quick_sorter.hpp:
--------------------------------------------------------------------------------
 1 | #ifndef QUICK_SORTER_HPP
 2 | #define QUICK_SORTER_HPP
 3 | 
 4 | #include <cmath>
 5 | #include <vector>
 6 | #include <algorithm>
 7 | #include <cassert>
 8 | 
 9 | #include <mipp.h>
10 | 
11 | template <typename T>
12 | class Quick_sorter
13 | {
14 | private:
15 | 	const int         size;
16 | 	mipp::vector<int> R;
17 | 	mipp::vector<T>   K;
18 | public:
19 | 	Quick_sorter(const int size) : size(size), R(size +2), K(size + 2)
20 | 	{
21 | 		std::iota(R.begin() +1 , R.begin() + this->size +1 , 0);
22 | 
23 | 		K[0] = std::numeric_limits<T>::min();
24 | 		K[size +1] = std::numeric_limits<T>::max();
25 | 	}
26 | 
27 | 	void sort(const T* values, std::vector<int> &pos, const int p_sort = -1)
28 | 	{
29 | 		const auto M = (p_sort <= 0) ? (int)pos.size() : p_sort;
30 | 		std::copy(values, values + size, K.begin() +1);
31 | 		std::iota(R.begin(), R.begin() + this->size +2, 0);
32 | 		// std::cout << "M: " << M << std::endl;
33 | 
34 | 		// Q1
35 | 		auto l = 1;
36 | 		auto r = size;
37 | 		auto j = r +1;
38 | 		do
39 | 		{
40 | 			auto i = l;
41 | 			j = r +1;
42 | 			auto KK = K[R[l]];
43 | 
44 | 			while (j > i)
45 | 			{
46 | 				do
47 | 				{
48 | 					i++;
49 | 				}
50 | 				while(K[R[i]] < KK);
51 | 
52 | 				do
53 | 				{
54 | 					j--;
55 | 				}
56 | 				while(KK < K[R[j]]);
57 | 
58 | 				if(j > i)
59 | 				{
60 | 					const auto tmp = R[j];
61 | 					R[j] = R[i];
62 | 					R[i] = tmp;
63 | 				}
64 | 				else
65 | 				{
66 | 					const auto tmp = R[l];
67 | 					R[l] = R[j];
68 | 					R[j] = tmp;
69 | 
70 | 					if(j < M +1)
71 | 						l = j +1;
72 | 					else
73 | 						r = j -1;
74 | 				}
75 | 			}
76 | 		} while (j != M +1);
77 | 		for (auto i = 0; i < M; i++)
78 | 			pos[i] = R[i +1] -1;
79 | 	}
80 | };
81 | 
82 | #endif /* QUICK_SORTER_HPP */
83 | 


--------------------------------------------------------------------------------
/examples/sorting.cpp:
--------------------------------------------------------------------------------
  1 | #include <iostream>
  2 | #include <random>
  3 | #include <algorithm>
  4 | #include <chrono>
  5 | #include <numeric>
  6 | 
  7 | #include <mipp.h>
  8 | 
  9 | #include "sort/LC_sorter.hpp"
 10 | #include "sort/LC_sorter_simd.hpp"
 11 | #include "sort/Quick_sorter.hpp"
 12 | 
 13 | using type = float;
 14 | 
 15 | int main(int argc, char** argv)
 16 | {
 17 | 	// ------------------------------------------------------------------------
 18 | 	std::cout << "MIPP example" << std::endl;
 19 | 	std::cout << "------------" << std::endl;
 20 | 	std::cout << " - Instr. type:       " << mipp::InstructionType                  << std::endl;
 21 | 	std::cout << " - Instr. full type:  " << mipp::InstructionFullType              << std::endl;
 22 | 	std::cout << " - Instr. version:    " << mipp::InstructionVersion               << std::endl;
 23 | 	std::cout << " - Reg. size:         " << mipp::RegisterSizeBit       << " bits" << std::endl;
 24 | 	std::cout << " - Reg. lanes:        " << mipp::Lanes                            << std::endl;
 25 | 	std::cout << " - 64-bit support:    " << (mipp::Support64Bit    ? "yes" : "no") << std::endl;
 26 | 	std::cout << " - Byte/word support: " << (mipp::SupportByteWord ? "yes" : "no") << std::endl;
 27 | 	auto ext = mipp::InstructionExtensions();
 28 | 	if (ext.size() > 0)
 29 | 	{
 30 | 		std::cout << " - Instr. extensions: {";
 31 | 		for (auto i = 0; i < (int)ext.size(); i++)
 32 | 			std::cout << ext[i] << (i < ((int)ext.size() -1) ? ", " : "");
 33 | 		std::cout << "}" << std::endl;
 34 | 	}
 35 | 	std::cout << std::endl;
 36 | 	// ------------------------------------------------------------------------
 37 | 
 38 | 	const auto n_elmts = (argc > 1) ? std::atoi(argv[1]) : 512;
 39 | 	const auto k       = (argc > 2) ? std::atoi(argv[2]) : 2;
 40 | 	const auto n_tests = (argc > 3) ? std::atoi(argv[3]) : 1000;
 41 | 
 42 | 	std::cout << "n_elmts = " << n_elmts << std::endl;
 43 | 	std::cout << "k       = " << k       << std::endl;
 44 | 	std::cout << "n_tests = " << n_tests << std::endl << std::endl;
 45 | 
 46 | 	std::random_device rd;
 47 | 	std::mt19937 g(rd());
 48 | 	g.seed(123);
 49 | 
 50 | 	std::vector<std::vector<float>> values(n_tests, std::vector<float>(n_elmts));
 51 | 	for (auto i = 0; i < n_tests; i++)
 52 | 	{
 53 | 		std::iota   (values[i].begin(), values[i].end(), 0);
 54 | 		std::shuffle(values[i].begin(), values[i].end(), g);
 55 | 	}
 56 | 
 57 | 	// -------------------------------------------------------------------------
 58 | 
 59 | 	std::cout << "values: ";
 60 | 	for (auto i = 0; i < n_elmts; i++)
 61 | 		std::cout << values[n_tests -1][i] << ",";
 62 | 	std::cout << std::endl;
 63 | 
 64 | 	std::cout << std::endl << "Lewis Carroll:" << std::endl;
 65 | 	std::vector<int> pos(k, -1);
 66 | 	LC_sorter<float> lc(n_elmts);
 67 | 	std::fill(pos.begin(), pos.end(), -1);
 68 | 	auto t_before = std::chrono::steady_clock::now();
 69 | 	auto csum = 0;
 70 | 	for (auto i = 0; i < n_tests; i++)
 71 | 	{
 72 | 		lc.partial_sort(values[i].data(), pos, n_elmts);
 73 | 		csum += pos[k -1];
 74 | 	}
 75 | 	auto t_after = std::chrono::steady_clock::now();
 76 | 	auto d_delta = t_after - t_before;
 77 | 	auto decod_time_ms = (float)d_delta.count() * 0.000001f;
 78 | 
 79 | 	std::cout << "min pos: "; for (auto i = 0; i < k; i++) std::cout <<                    pos[i]  << ","; std::cout << std::endl;
 80 | 	std::cout << "min: ";     for (auto i = 0; i < k; i++) std::cout << values[n_tests -1][pos[i]] << ","; std::cout << std::endl;
 81 | 	std::cout << "time: " << decod_time_ms << "ms" << std::endl;
 82 | 	std::cout << "csum: " << csum << std::endl;
 83 | 
 84 | 	std::cout << std::endl << "Lewis Carroll SIMD:" << std::endl;
 85 | 	LC_sorter_simd<float> lc_simd(n_elmts);
 86 | 	std::fill(pos.begin(), pos.end(), -1);
 87 | 	t_before = std::chrono::steady_clock::now();
 88 | 	csum = 0;
 89 | 	for (auto i = 0; i < n_tests; i++)
 90 | 	{
 91 | 		lc_simd.partial_sort(values[i].data(), pos);
 92 | 		csum += pos[k -1];
 93 | 	}
 94 | 	t_after = std::chrono::steady_clock::now();
 95 | 	d_delta = t_after - t_before;
 96 | 	decod_time_ms = (float)d_delta.count() * 0.000001f;
 97 | 
 98 | 	std::cout << "min pos: "; for (auto i = 0; i < k; i++) std::cout <<                    pos[i]  << ","; std::cout << std::endl;
 99 | 	std::cout << "min: ";     for (auto i = 0; i < k; i++) std::cout << values[n_tests -1][pos[i]] << ","; std::cout << std::endl;
100 | 	std::cout << "time: " << decod_time_ms << "ms" << std::endl;
101 | 	std::cout << "csum: " << csum << std::endl;
102 | 
103 | 	std::cout << std::endl << "Partial Quicksort:" << std::endl;
104 | 	Quick_sorter<type> qs(n_elmts);
105 | 	std::fill(pos.begin(), pos.end(), -1);
106 | 	t_before = std::chrono::steady_clock::now();
107 | 	csum = 0;
108 | 	for (auto i = 0; i < n_tests; i++)
109 | 	{
110 | 		qs.sort(values[i].data(), pos);
111 | 		csum += pos[k -1];
112 | 	}
113 | 	t_after = std::chrono::steady_clock::now();
114 | 	d_delta = t_after - t_before;
115 | 	decod_time_ms = (float)d_delta.count() * 0.000001f;
116 | 
117 | 	std::cout << "min pos: "; for (auto i = 0; i < k; i++) std::cout <<                    pos[i]  << ","; std::cout << std::endl;
118 | 	std::cout << "min: ";     for (auto i = 0; i < k; i++) std::cout << values[n_tests -1][pos[i]] << ","; std::cout << std::endl;
119 | 	std::cout << "time: " << decod_time_ms << "ms" << std::endl;
120 | 	std::cout << "csum: " << csum << std::endl;
121 | 
122 | 	// std::cout << std::endl << "C++11 partial sort:" << std::endl;
123 | 	// mipp::vector<int> indices(n_elmts);
124 | 	// std::iota(indices.begin(), indices.end(), 0);
125 | 	// t_before = std::chrono::steady_clock::now();
126 | 	// csum = 0;
127 | 	// for (auto i = 0; i < n_tests; i++)
128 | 	// {
129 | 	// 	std::partial_sort(indices.begin(), indices.begin() +k, indices.end(),
130 | 	// 	[values, i](int x, int y) {
131 | 	// 		return values[i][x] < values[i][y];
132 | 	// 	});
133 | 	// 	csum += indices[k -1];
134 | 	// }
135 | 	
136 | 	// t_after = std::chrono::steady_clock::now();
137 | 	// d_delta = t_after - t_before;
138 | 	// decod_time_ms = (float)d_delta.count() * 0.000001f;
139 | 
140 | 	// std::cout << "min pos: "; for (auto i = 0; i < k; i++) std::cout << indices[i]                     << ","; std::cout << std::endl;
141 | 	// std::cout << "min: ";     for (auto i = 0; i < k; i++) std::cout << values[n_tests -1][indices[i]] << ","; std::cout << std::endl;
142 | 	// std::cout << "time: " << decod_time_ms << "ms" << std::endl;
143 | 	// std::cout << "csum: " << csum << std::endl;
144 | 
145 | 	return 0;
146 | }
147 | 


--------------------------------------------------------------------------------
/include/math/avx512_mathfun.h:
--------------------------------------------------------------------------------
 1 | /* 
 2 |    AVX512 implementation of sin, cos, sincos, exp and log
 3 | 
 4 |    Based on "sse_mathfun.h", by Julien Pommier
 5 |    http://gruntthepeon.free.fr/ssemath/
 6 | 
 7 |    Copyright (C) 2017 Adrien Cassagne
 8 |    MIT license
 9 | */
10 | #ifdef __AVX512F__
11 | #ifndef AVX512_MATHFUN_H_
12 | #define AVX512_MATHFUN_H_
13 | 
14 | #include <immintrin.h>
15 | 
16 | typedef __m512 v16sf; // vector of 8 float (avx)
17 | 
18 | // prototypes
19 | inline v16sf log512_ps(v16sf x);
20 | inline v16sf exp512_ps(v16sf x);
21 | inline v16sf sin512_ps(v16sf x);
22 | inline v16sf cos512_ps(v16sf x);
23 | inline void sincos512_ps(v16sf x, v16sf *s, v16sf *c);
24 | 
25 | #include "avx512_mathfun.hxx"
26 | 
27 | #endif
28 | #endif
29 | 


--------------------------------------------------------------------------------
/include/math/avx_mathfun.h:
--------------------------------------------------------------------------------
 1 | /* 
 2 |    AVX implementation of sin, cos, sincos, exp and log
 3 | 
 4 |    Based on "sse_mathfun.h", by Julien Pommier
 5 |    http://gruntthepeon.free.fr/ssemath/
 6 | 
 7 |    Copyright (C) 2012 Giovanni Garberoglio
 8 |    Interdisciplinary Laboratory for Computational Science (LISC)
 9 |    Fondazione Bruno Kessler and University of Trento
10 |    via Sommarive, 18
11 |    I-38123 Trento (Italy)
12 | 
13 |   This software is provided 'as-is', without any express or implied
14 |   warranty.  In no event will the authors be held liable for any damages
15 |   arising from the use of this software.
16 | 
17 |   Permission is granted to anyone to use this software for any purpose,
18 |   including commercial applications, and to alter it and redistribute it
19 |   freely, subject to the following restrictions:
20 | 
21 |   1. The origin of this software must not be misrepresented; you must not
22 |      claim that you wrote the original software. If you use this software
23 |      in a product, an acknowledgment in the product documentation would be
24 |      appreciated but is not required.
25 |   2. Altered source versions must be plainly marked as such, and must not be
26 |      misrepresented as being the original software.
27 |   3. This notice may not be removed or altered from any source distribution.
28 | 
29 |   (this is the zlib license)
30 | */
31 | #ifdef __AVX__
32 | #ifndef AVX_MATHFUN_H_
33 | #define AVX_MATHFUN_H_
34 | 
35 | #include <immintrin.h>
36 | 
37 | typedef __m256 v8sf; // vector of 8 float (avx)
38 | 
39 | // prototypes
40 | inline v8sf log256_ps(v8sf x);
41 | inline v8sf exp256_ps(v8sf x);
42 | inline v8sf sin256_ps(v8sf x);
43 | inline v8sf cos256_ps(v8sf x);
44 | inline void sincos256_ps(v8sf x, v8sf *s, v8sf *c);
45 | 
46 | #include "avx_mathfun.hxx"
47 | 
48 | #endif
49 | #endif


--------------------------------------------------------------------------------
/include/math/neon_mathfun.h:
--------------------------------------------------------------------------------
 1 | /* NEON implementation of sin, cos, exp and log
 2 | 
 3 |    Inspired by Intel Approximate Math library, and based on the
 4 |    corresponding algorithms of the cephes math library
 5 | */
 6 | 
 7 | /* Copyright (C) 2011  Julien Pommier
 8 | 
 9 |   This software is provided 'as-is', without any express or implied
10 |   warranty.  In no event will the authors be held liable for any damages
11 |   arising from the use of this software.
12 | 
13 |   Permission is granted to anyone to use this software for any purpose,
14 |   including commercial applications, and to alter it and redistribute it
15 |   freely, subject to the following restrictions:
16 | 
17 |   1. The origin of this software must not be misrepresented; you must not
18 |      claim that you wrote the original software. If you use this software
19 |      in a product, an acknowledgment in the product documentation would be
20 |      appreciated but is not required.
21 |   2. Altered source versions must be plainly marked as such, and must not be
22 |      misrepresented as being the original software.
23 |   3. This notice may not be removed or altered from any source distribution.
24 | 
25 |   (this is the zlib license)
26 | */
27 | 
28 | #if defined(__ARM_NEON__) || defined(__ARM_NEON)
29 | #ifndef NEON_MATHFUN_H_
30 | #define NEON_MATHFUN_H_
31 | 
32 | #include <arm_neon.h>
33 | 
34 | typedef float32x4_t v4sf; // vector of 4 float
35 | 
36 | // prototypes
37 | inline v4sf log_ps(v4sf x);
38 | inline v4sf exp_ps(v4sf x);
39 | inline v4sf sin_ps(v4sf x);
40 | inline v4sf cos_ps(v4sf x);
41 | inline void sincos_ps(v4sf x, v4sf *s, v4sf *c);
42 | 
43 | #include "neon_mathfun.hxx"
44 | 
45 | #endif
46 | #endif


--------------------------------------------------------------------------------
/include/math/sse_mathfun.h:
--------------------------------------------------------------------------------
 1 | /* SIMD (SSE1+MMX or SSE2) implementation of sin, cos, exp and log
 2 | 
 3 |    Inspired by Intel Approximate Math library, and based on the
 4 |    corresponding algorithms of the cephes math library
 5 | 
 6 |    The default is to use the SSE1 version. If you define USE_SSE2 the
 7 |    the SSE2 intrinsics will be used in place of the MMX intrinsics. Do
 8 |    not expect any significant performance improvement with SSE2.
 9 | */
10 | 
11 | /* Copyright (C) 2007  Julien Pommier
12 | 
13 |   This software is provided 'as-is', without any express or implied
14 |   warranty.  In no event will the authors be held liable for any damages
15 |   arising from the use of this software.
16 | 
17 |   Permission is granted to anyone to use this software for any purpose,
18 |   including commercial applications, and to alter it and redistribute it
19 |   freely, subject to the following restrictions:
20 | 
21 |   1. The origin of this software must not be misrepresented; you must not
22 |      claim that you wrote the original software. If you use this software
23 |      in a product, an acknowledgment in the product documentation would be
24 |      appreciated but is not required.
25 |   2. Altered source versions must be plainly marked as such, and must not be
26 |      misrepresented as being the original software.
27 |   3. This notice may not be removed or altered from any source distribution.
28 | 
29 |   (this is the zlib license)
30 | */
31 | 
32 | #ifdef __SSE__
33 | #ifndef SSE_MATHFUN_H_
34 | #define SSE_MATHFUN_H_
35 | 
36 | #include <xmmintrin.h>
37 | 
38 | typedef __m128 v4sf;  // vector of 4 float (sse1)
39 | 
40 | // prototypes
41 | inline v4sf log_ps(v4sf x);
42 | inline v4sf exp_ps(v4sf x);
43 | inline v4sf sin_ps(v4sf x);
44 | inline v4sf cos_ps(v4sf x);
45 | inline void sincos_ps(v4sf x, v4sf *s, v4sf *c);
46 | 
47 | #include "sse_mathfun.hxx"
48 | 
49 | #endif
50 | #endif


--------------------------------------------------------------------------------
/include/mipp_scalar_op.h:
--------------------------------------------------------------------------------
 1 | #ifndef MIPP_SCALAR_OP_H_
 2 | #define MIPP_SCALAR_OP_H_
 3 | 
 4 | namespace mipp_scop // My Intrinsics Plus Plus SCalar OPerations
 5 | {
 6 | 	template <typename T>
 7 | 	inline T add(const T val1, const T val2);
 8 | 
 9 | 	template <typename T>
10 | 	inline T sub(const T val1, const T val2);
11 | 
12 | 	template <typename T>
13 | 	inline T andb(const T val1, const T val2);
14 | 
15 | 	template <typename T>
16 | 	inline T xorb(const T val1, const T val2);
17 | 
18 | 	template <typename T>
19 | 	inline T msb(const T val);
20 | 
21 | 	template <typename T>
22 | 	inline T div2(const T val);
23 | 
24 | 	template <typename T>
25 | 	inline T div4(const T val);
26 | 
27 | 	template <typename T>
28 | 	inline T rshift(const T val, const int n);
29 | 
30 | 	template <typename T>
31 | 	inline T lshift(const T val, const int n);
32 | 
33 | 	template <typename T>
34 | 	struct All_one_bits {
35 | 		static inline T make();
36 | 	};
37 | }
38 | 
39 | #include "mipp_scalar_op.hxx"
40 | 
41 | #endif /* MIPP_SCALAR_OP_H_ */
42 | 


--------------------------------------------------------------------------------
/mipp.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aff3ct/MIPP/75fc843637988f93ebea9a23d3f6f018c37a1970/mipp.jpg


--------------------------------------------------------------------------------
/tests/lib/Catch2/README.md:
--------------------------------------------------------------------------------
1 | Catch2 (v2.13.10) from [GitHub](https://github.com/catchorg/Catch2).


--------------------------------------------------------------------------------
/tests/src/arithmetic_operations/cdiv.cpp:
--------------------------------------------------------------------------------
  1 | #include <exception>
  2 | #include <algorithm>
  3 | #include <numeric>
  4 | #include <random>
  5 | #include <cmath>
  6 | #include <mipp.h>
  7 | #include <catch.hpp>
  8 | 
  9 | template <typename T>
 10 | void test_reg_cdiv()
 11 | {
 12 | 	T inputs1[2*mipp::N<T>()];
 13 | 	T inputs2[2*mipp::N<T>()];
 14 | 
 15 | 	std::iota(inputs1, inputs1 + 2*mipp::N<T>(), (T)1);
 16 | 	std::iota(inputs2, inputs2 + 2*mipp::N<T>(), (T)1);
 17 | 
 18 | 	std::mt19937 g;
 19 | 	std::shuffle(inputs1, inputs1 + 2*mipp::N<T>(), g);
 20 | 	std::shuffle(inputs2, inputs2 + 2*mipp::N<T>(), g);
 21 | 
 22 | 	mipp::regx2 r1;
 23 | 	mipp::regx2 r2;
 24 | 
 25 | 	r1.val[0] = mipp::load<T>(inputs1);
 26 | 	r1.val[1] = mipp::load<T>(inputs1 + mipp::N<T>());
 27 | 	r2.val[0] = mipp::load<T>(inputs2);
 28 | 	r2.val[1] = mipp::load<T>(inputs2 + mipp::N<T>());
 29 | 
 30 | 	mipp::regx2 r3 = mipp::cdiv<T>(r1, r2);
 31 | 
 32 | 	for (auto i = 0; i < mipp::N<T>(); i++)
 33 | 	{
 34 | 		T norm = inputs2[i] * inputs2[i] + inputs2[mipp::N<T>() +i] * inputs2[mipp::N<T>() +i];
 35 | 
 36 | 		T res_re = (inputs1[i              ] * inputs2[i] + inputs1[mipp::N<T>() +i] * inputs2[mipp::N<T>() +i]) / norm;
 37 | 		T res_im = (inputs1[mipp::N<T>() +i] * inputs2[i] - inputs1[              i] * inputs2[mipp::N<T>() +i]) / norm;
 38 | 
 39 | #if defined(MIPP_NEON) && MIPP_INSTR_VERSION == 1
 40 | 		REQUIRE(mipp::get<T>(r3.val[0], i) == Approx(res_re).epsilon(0.01));
 41 | 		REQUIRE(mipp::get<T>(r3.val[1], i) == Approx(res_im).epsilon(0.01));
 42 | #else
 43 | 		REQUIRE(mipp::get<T>(r3.val[0], i) == res_re);
 44 | 		REQUIRE(mipp::get<T>(r3.val[1], i) == res_im);
 45 | #endif
 46 | 	}
 47 | }
 48 | 
 49 | #if !defined(MIPP_NO) && !defined(MIPP_SVE_LS)
 50 | TEST_CASE("Complex division - mipp::reg", "[mipp::cdiv]")
 51 | {
 52 | #if defined(MIPP_64BIT)
 53 | 	SECTION("datatype = double") { test_reg_cdiv<double>(); }
 54 | #endif
 55 | 	SECTION("datatype = float") { test_reg_cdiv<float>(); }
 56 | }
 57 | #endif
 58 | 
 59 | template <typename T>
 60 | void test_Reg_cdiv()
 61 | {
 62 | 	T inputs1[2*mipp::N<T>()];
 63 | 	T inputs2[2*mipp::N<T>()];
 64 | 
 65 | 	std::iota(inputs1, inputs1 + 2*mipp::N<T>(), (T)1);
 66 | 	std::iota(inputs2, inputs2 + 2*mipp::N<T>(), (T)1);
 67 | 
 68 | 	std::mt19937 g;
 69 | 	std::shuffle(inputs1, inputs1 + 2*mipp::N<T>(), g);
 70 | 	std::shuffle(inputs2, inputs2 + 2*mipp::N<T>(), g);
 71 | 
 72 | 	mipp::Regx2<T> r1 = inputs1;
 73 | 	mipp::Regx2<T> r2 = inputs2;
 74 | 	mipp::Regx2<T> r3 = r1 / r2;
 75 | 
 76 | 	for (auto i = 0; i < mipp::N<T>(); i++)
 77 | 	{
 78 | 		T norm = inputs2[i] * inputs2[i] + inputs2[mipp::N<T>() +i] * inputs2[mipp::N<T>() +i];
 79 | 
 80 | 		T res_re = (inputs1[i              ] * inputs2[i] + inputs1[mipp::N<T>() +i] * inputs2[mipp::N<T>() +i]) / norm;
 81 | 		T res_im = (inputs1[mipp::N<T>() +i] * inputs2[i] - inputs1[              i] * inputs2[mipp::N<T>() +i]) / norm;
 82 | 
 83 | #if defined(MIPP_NEON) && MIPP_INSTR_VERSION == 1
 84 | 		REQUIRE(r3[0][i] == Approx(res_re).epsilon(0.01));
 85 | 		REQUIRE(r3[1][i] == Approx(res_im).epsilon(0.01));
 86 | #else
 87 | 		REQUIRE(r3[0][i] == res_re);
 88 | 		REQUIRE(r3[1][i] == res_im);
 89 | #endif
 90 | 	}
 91 | }
 92 | 
 93 | #if !defined(MIPP_SVE_LS)
 94 | TEST_CASE("Complex division - mipp::Reg", "[mipp::cdiv]")
 95 | {
 96 | #if defined(MIPP_64BIT)
 97 | 	SECTION("datatype = double") { test_Reg_cdiv<double>(); }
 98 | #endif
 99 | 	SECTION("datatype = float") { test_Reg_cdiv<float>(); }
100 | }
101 | #endif
102 | 


--------------------------------------------------------------------------------
/tests/src/arithmetic_operations/cmul.cpp:
--------------------------------------------------------------------------------
  1 | #include <exception>
  2 | #include <algorithm>
  3 | #include <numeric>
  4 | #include <random>
  5 | #include <cmath>
  6 | #include <mipp.h>
  7 | #include <catch.hpp>
  8 | 
  9 | template <typename T>
 10 | void test_reg_cmul()
 11 | {
 12 | 	T inputs1[2*mipp::N<T>()];
 13 | 	T inputs2[2*mipp::N<T>()];
 14 | 
 15 | 	std::iota(inputs1, inputs1 + 2*mipp::N<T>(), (T)1);
 16 | 	std::iota(inputs2, inputs2 + 2*mipp::N<T>(), (T)1);
 17 | 
 18 | 	std::mt19937 g;
 19 | 	std::shuffle(inputs1, inputs1 + 2*mipp::N<T>(), g);
 20 | 	std::shuffle(inputs2, inputs2 + 2*mipp::N<T>(), g);
 21 | 
 22 | 	mipp::regx2 r1;
 23 | 	mipp::regx2 r2;
 24 | 
 25 | 	r1.val[0] = mipp::load<T>(inputs1);
 26 | 	r1.val[1] = mipp::load<T>(inputs1 + mipp::N<T>());
 27 | 	r2.val[0] = mipp::load<T>(inputs2);
 28 | 	r2.val[1] = mipp::load<T>(inputs2 + mipp::N<T>());
 29 | 
 30 | 	mipp::regx2 r3 = mipp::cmul<T>(r1, r2);
 31 | 
 32 | 	for (auto i = 0; i < mipp::N<T>(); i++)
 33 | 	{
 34 | 		T res_re = inputs1[i] * inputs2[              i] - inputs1[mipp::N<T>() +i] * inputs2[mipp::N<T>() +i];
 35 | 		T res_im = inputs1[i] * inputs2[mipp::N<T>() +i] + inputs1[mipp::N<T>() +i] * inputs2[              i];
 36 | 
 37 | #if defined(MIPP_NEON) && MIPP_INSTR_VERSION == 1
 38 | 		REQUIRE(mipp::get<T>(r3.val[0], i) == Approx(res_re));
 39 | 		REQUIRE(mipp::get<T>(r3.val[1], i) == Approx(res_im));
 40 | #else
 41 | 		REQUIRE(mipp::get<T>(r3.val[0], i) == res_re);
 42 | 		REQUIRE(mipp::get<T>(r3.val[1], i) == res_im);
 43 | #endif
 44 | 	}
 45 | }
 46 | 
 47 | #if !defined(MIPP_NO) && !defined(MIPP_SVE_LS)
 48 | TEST_CASE("Complex multiplication - mipp::reg", "[mipp::cmul]")
 49 | {
 50 | #if defined(MIPP_64BIT)
 51 | 	SECTION("datatype = double") { test_reg_cmul<double>(); }
 52 | #endif
 53 | 	SECTION("datatype = float") { test_reg_cmul<float>(); }
 54 | 
 55 | #if !defined(MIPP_AVX) || (defined(MIPP_AVX) && MIPP_INSTR_VERSION >= 2)
 56 | #if !defined(MIPP_SSE) || (defined(MIPP_SSE) && MIPP_INSTR_VERSION >= 41)
 57 | 	SECTION("datatype = int32_t") { test_reg_cmul<int32_t>(); }
 58 | #endif
 59 | #if defined(MIPP_BW)
 60 | #if !defined(MIPP_SSE) || (defined(MIPP_SSE) && MIPP_INSTR_VERSION >= 2)
 61 | 	SECTION("datatype = int16_t") { test_reg_cmul<int16_t>(); }
 62 | #endif
 63 | #endif
 64 | // #if defined(MIPP_NEON)
 65 | // 	SECTION("datatype = int8_t") { test_reg_cmul<int8_t>(); }
 66 | // #endif
 67 | #endif
 68 | }
 69 | #endif
 70 | 
 71 | template <typename T>
 72 | void test_Reg_cmul()
 73 | {
 74 | 	T inputs1[2*mipp::N<T>()];
 75 | 	T inputs2[2*mipp::N<T>()];
 76 | 
 77 | 	std::iota(inputs1, inputs1 + 2*mipp::N<T>(), (T)1);
 78 | 	std::iota(inputs2, inputs2 + 2*mipp::N<T>(), (T)1);
 79 | 
 80 | 	std::mt19937 g;
 81 | 	std::shuffle(inputs1, inputs1 + 2*mipp::N<T>(), g);
 82 | 	std::shuffle(inputs2, inputs2 + 2*mipp::N<T>(), g);
 83 | 
 84 | 	mipp::Regx2<T> r1 = inputs1;
 85 | 	mipp::Regx2<T> r2 = inputs2;
 86 | 	mipp::Regx2<T> r3 = r1 * r2;
 87 | 
 88 | 	for (auto i = 0; i < mipp::N<T>(); i++)
 89 | 	{
 90 | 		T res_re = inputs1[i] * inputs2[              i] - inputs1[mipp::N<T>() +i] * inputs2[mipp::N<T>() +i];
 91 | 		T res_im = inputs1[i] * inputs2[mipp::N<T>() +i] + inputs1[mipp::N<T>() +i] * inputs2[              i];
 92 | 
 93 | #if defined(MIPP_NEON) && MIPP_INSTR_VERSION == 1
 94 | 		REQUIRE(r3[0][i] == Approx(res_re));
 95 | 		REQUIRE(r3[1][i] == Approx(res_im));
 96 | #else
 97 | 		REQUIRE(r3[0][i] == res_re);
 98 | 		REQUIRE(r3[1][i] == res_im);
 99 | #endif
100 | 	}
101 | }
102 | 
103 | #if !defined(MIPP_SVE_LS)
104 | TEST_CASE("Complex multiplication - mipp::Reg", "[mipp::cmul]")
105 | {
106 | #if defined(MIPP_64BIT)
107 | 	SECTION("datatype = double") { test_Reg_cmul<double>(); }
108 | #endif
109 | 	SECTION("datatype = float") { test_Reg_cmul<float>(); }
110 | 
111 | #if !defined(MIPP_AVX) || (defined(MIPP_AVX) && MIPP_INSTR_VERSION >= 2)
112 | #if !defined(MIPP_SSE) || (defined(MIPP_SSE) && MIPP_INSTR_VERSION >= 41)
113 | 	SECTION("datatype = int32_t") { test_Reg_cmul<int32_t>(); }
114 | #endif
115 | #if defined(MIPP_BW)
116 | #if !defined(MIPP_SSE) || (defined(MIPP_SSE) && MIPP_INSTR_VERSION >= 2)
117 | 	SECTION("datatype = int16_t") { test_Reg_cmul<int16_t>(); }
118 | #endif
119 | #endif
120 | // #if defined(MIPP_NEON)
121 | // 	SECTION("datatype = int8_t") { test_Reg_cmul<int8_t>(); }
122 | // #endif
123 | #endif
124 | }
125 | #endif
126 | 


--------------------------------------------------------------------------------
/tests/src/arithmetic_operations/cmulconj.cpp:
--------------------------------------------------------------------------------
  1 | #include <exception>
  2 | #include <algorithm>
  3 | #include <numeric>
  4 | #include <random>
  5 | #include <cmath>
  6 | #include <mipp.h>
  7 | #include <catch.hpp>
  8 | 
  9 | template <typename T>
 10 | void test_reg_cmulconj()
 11 | {
 12 | 	T inputs1[2*mipp::N<T>()];
 13 | 	T inputs2[2*mipp::N<T>()];
 14 | 
 15 | 	std::iota(inputs1, inputs1 + 2*mipp::N<T>(), (T)1);
 16 | 	std::iota(inputs2, inputs2 + 2*mipp::N<T>(), (T)1);
 17 | 
 18 | 	std::mt19937 g;
 19 | 	std::shuffle(inputs1, inputs1 + 2*mipp::N<T>(), g);
 20 | 	std::shuffle(inputs2, inputs2 + 2*mipp::N<T>(), g);
 21 | 
 22 | 	mipp::regx2 r1;
 23 | 	mipp::regx2 r2;
 24 | 
 25 | 	r1.val[0] = mipp::load<T>(inputs1);
 26 | 	r1.val[1] = mipp::load<T>(inputs1 + mipp::N<T>());
 27 | 	r2.val[0] = mipp::load<T>(inputs2);
 28 | 	r2.val[1] = mipp::load<T>(inputs2 + mipp::N<T>());
 29 | 
 30 | 	mipp::regx2 r3 = mipp::cmulconj<T>(r1, r2);
 31 | 
 32 | 	for (auto i = 0; i < mipp::N<T>(); i++)
 33 | 	{
 34 | 		T res_re = inputs1[              i] * inputs2[i] + inputs1[mipp::N<T>() +i] * inputs2[mipp::N<T>() +i];
 35 | 		T res_im = inputs1[mipp::N<T>() +i] * inputs2[i] - inputs1[              i] * inputs2[mipp::N<T>() +i];
 36 | 
 37 | #if defined(MIPP_NEON) && MIPP_INSTR_VERSION == 1
 38 | 		REQUIRE(mipp::get<T>(r3.val[0], i) == Approx(res_re));
 39 | 		REQUIRE(mipp::get<T>(r3.val[1], i) == Approx(res_im));
 40 | #else
 41 | 		REQUIRE(mipp::get<T>(r3.val[0], i) == res_re);
 42 | 		REQUIRE(mipp::get<T>(r3.val[1], i) == res_im);
 43 | #endif
 44 | 	}
 45 | }
 46 | 
 47 | #if !defined(MIPP_NO) && !defined(MIPP_SVE_LS)
 48 | TEST_CASE("Complex multiplication conjugate - mipp::reg", "[mipp::cmulconj]")
 49 | {
 50 | #if defined(MIPP_64BIT)
 51 | 	SECTION("datatype = double") { test_reg_cmulconj<double>(); }
 52 | #endif
 53 | 	SECTION("datatype = float") { test_reg_cmulconj<float>(); }
 54 | 
 55 | #if !defined(MIPP_AVX) || (defined(MIPP_AVX) && MIPP_INSTR_VERSION >= 2)
 56 | #if !defined(MIPP_SSE) || (defined(MIPP_SSE) && MIPP_INSTR_VERSION >= 41)
 57 | 	SECTION("datatype = int32_t") { test_reg_cmulconj<int32_t>(); }
 58 | #endif
 59 | #if defined(MIPP_BW)
 60 | #if !defined(MIPP_SSE) || (defined(MIPP_SSE) && MIPP_INSTR_VERSION >= 2)
 61 | 	SECTION("datatype = int16_t") { test_reg_cmulconj<int16_t>(); }
 62 | #endif
 63 | #endif
 64 | // #if defined(MIPP_NEON)
 65 | // 	SECTION("datatype = int8_t") { test_reg_cmulconj<int8_t>(); }
 66 | // #endif
 67 | #endif
 68 | }
 69 | #endif
 70 | 
 71 | template <typename T>
 72 | void test_Reg_cmulconj()
 73 | {
 74 | 	T inputs1[2*mipp::N<T>()];
 75 | 	T inputs2[2*mipp::N<T>()];
 76 | 
 77 | 	std::iota(inputs1, inputs1 + 2*mipp::N<T>(), (T)1);
 78 | 	std::iota(inputs2, inputs2 + 2*mipp::N<T>(), (T)1);
 79 | 
 80 | 	std::mt19937 g;
 81 | 	std::shuffle(inputs1, inputs1 + 2*mipp::N<T>(), g);
 82 | 	std::shuffle(inputs2, inputs2 + 2*mipp::N<T>(), g);
 83 | 
 84 | 	mipp::Regx2<T> r1 = inputs1;
 85 | 	mipp::Regx2<T> r2 = inputs2;
 86 | 	mipp::Regx2<T> r3 = mipp::cmulconj(r1, r2);
 87 | 
 88 | 	for (auto i = 0; i < mipp::N<T>(); i++)
 89 | 	{
 90 | 		T res_re = inputs1[              i] * inputs2[i] + inputs1[mipp::N<T>() +i] * inputs2[mipp::N<T>() +i];
 91 | 		T res_im = inputs1[mipp::N<T>() +i] * inputs2[i] - inputs1[              i] * inputs2[mipp::N<T>() +i];
 92 | 
 93 | #if defined(MIPP_NEON) && MIPP_INSTR_VERSION == 1
 94 | 		REQUIRE(r3[0][i] == Approx(res_re));
 95 | 		REQUIRE(r3[1][i] == Approx(res_im));
 96 | #else
 97 | 		REQUIRE(r3[0][i] == res_re);
 98 | 		REQUIRE(r3[1][i] == res_im);
 99 | #endif
100 | 	}
101 | }
102 | 
103 | #if !defined(MIPP_SVE_LS)
104 | TEST_CASE("Complex multiplication conjugate - mipp::Reg", "[mipp::cmulconj]")
105 | {
106 | #if defined(MIPP_64BIT)
107 | 	SECTION("datatype = double") { test_Reg_cmulconj<double>(); }
108 | #endif
109 | 	SECTION("datatype = float") { test_Reg_cmulconj<float>(); }
110 | 
111 | #if !defined(MIPP_AVX) || (defined(MIPP_AVX) && MIPP_INSTR_VERSION >= 2)
112 | #if !defined(MIPP_SSE) || (defined(MIPP_SSE) && MIPP_INSTR_VERSION >= 41)
113 | 	SECTION("datatype = int32_t") { test_Reg_cmulconj<int32_t>(); }
114 | #endif
115 | #if defined(MIPP_BW)
116 | #if !defined(MIPP_SSE) || (defined(MIPP_SSE) && MIPP_INSTR_VERSION >= 2)
117 | 	SECTION("datatype = int16_t") { test_Reg_cmulconj<int16_t>(); }
118 | #endif
119 | #endif
120 | // #if defined(MIPP_NEON)
121 | // 	SECTION("datatype = int8_t") { test_Reg_cmulconj<int8_t>(); }
122 | // #endif
123 | #endif
124 | }
125 | #endif
126 | 


--------------------------------------------------------------------------------
/tests/src/arithmetic_operations/conj.cpp:
--------------------------------------------------------------------------------
 1 | #include <exception>
 2 | #include <algorithm>
 3 | #include <numeric>
 4 | #include <random>
 5 | #include <cmath>
 6 | #include <mipp.h>
 7 | #include <catch.hpp>
 8 | 
 9 | template <typename T>
10 | void test_reg_conj()
11 | {
12 | 	T inputs1[2*mipp::N<T>()];
13 | 
14 | 	std::iota(inputs1, inputs1 + 2*mipp::N<T>(), (T)0);
15 | 
16 | 	std::mt19937 g;
17 | 	std::shuffle(inputs1, inputs1 + 2*mipp::N<T>(), g);
18 | 
19 | 	mipp::regx2 r1;
20 | 
21 | 	r1.val[0] = mipp::load<T>(inputs1);
22 | 	r1.val[1] = mipp::load<T>(inputs1 + mipp::N<T>());
23 | 
24 | 	mipp::regx2 r2 = mipp::conj<T>(r1);
25 | 
26 | 	for (auto i = 0; i < mipp::N<T>(); i++)
27 | 	{
28 | 		T res_re =  inputs1[              i];
29 | 		T res_im = -inputs1[mipp::N<T>() +i];
30 | 
31 | 		REQUIRE(mipp::get<T>(r2.val[0], i) == res_re);
32 | 		REQUIRE(mipp::get<T>(r2.val[1], i) == res_im);
33 | 	}
34 | }
35 | 
36 | #if !defined(MIPP_NO) && !defined(MIPP_SVE_LS)
37 | TEST_CASE("Complex conjugate - mipp::reg", "[mipp::conj]")
38 | {
39 | #if defined(MIPP_64BIT)
40 | 	SECTION("datatype = double") { test_reg_conj<double>(); }
41 | #endif
42 | 	SECTION("datatype = float") { test_reg_conj<float>(); }
43 | 
44 | #if !defined(MIPP_AVX) || (defined(MIPP_AVX) && MIPP_INSTR_VERSION >= 2)
45 | #if !defined(MIPP_SSE) || (defined(MIPP_SSE) && MIPP_INSTR_VERSION >= 2)
46 | 	SECTION("datatype = int32_t") { test_reg_conj<int32_t>(); }
47 | #endif
48 | #endif
49 | #if defined(MIPP_BW)
50 | 	SECTION("datatype = int16_t") { test_reg_conj<int16_t>(); }
51 | 	SECTION("datatype = int8_t") { test_reg_conj<int8_t>(); }
52 | #endif
53 | }
54 | #endif
55 | 
56 | template <typename T>
57 | void test_Reg_conj()
58 | {
59 | 	T inputs1[2*mipp::N<T>()];
60 | 
61 | 	std::iota(inputs1, inputs1 + 2*mipp::N<T>(), (T)0);
62 | 
63 | 	std::mt19937 g;
64 | 	std::shuffle(inputs1, inputs1 + 2*mipp::N<T>(), g);
65 | 
66 | 	mipp::Regx2<T> r1 = inputs1;
67 | 	mipp::Regx2<T> r2 = mipp::conj(r1);
68 | 
69 | 	for (auto i = 0; i < mipp::N<T>(); i++)
70 | 	{
71 | 		T res_re =  inputs1[              i];
72 | 		T res_im = -inputs1[mipp::N<T>() +i];
73 | 
74 | 		REQUIRE(r2[0][i] == res_re);
75 | 		REQUIRE(r2[1][i] == res_im);
76 | 	}
77 | }
78 | 
79 | #if !defined(MIPP_SVE_LS)
80 | TEST_CASE("Complex conjugate - mipp::Reg", "[mipp::conj]")
81 | {
82 | #if defined(MIPP_64BIT)
83 | 	SECTION("datatype = double") { test_Reg_conj<double>(); }
84 | #endif
85 | 	SECTION("datatype = float") { test_Reg_conj<float>(); }
86 | 
87 | #if !defined(MIPP_AVX) || (defined(MIPP_AVX) && MIPP_INSTR_VERSION >= 2)
88 | #if !defined(MIPP_SSE) || (defined(MIPP_SSE) && MIPP_INSTR_VERSION >= 2)
89 | 	SECTION("datatype = int32_t") { test_Reg_conj<int32_t>(); }
90 | #endif
91 | #endif
92 | #if defined(MIPP_BW)
93 | 	SECTION("datatype = int16_t") { test_Reg_conj<int16_t>(); }
94 | 	SECTION("datatype = int8_t") { test_Reg_conj<int8_t>(); }
95 | #endif
96 | }
97 | #endif
98 | 


--------------------------------------------------------------------------------
/tests/src/arithmetic_operations/cvt.cpp:
--------------------------------------------------------------------------------
  1 | #include <exception>
  2 | #include <algorithm>
  3 | #include <numeric>
  4 | #include <random>
  5 | #include <cmath>
  6 | #include <mipp.h>
  7 | #include <catch.hpp>
  8 | 
  9 | template <typename T1, typename T2>
 10 | void test_reg_cvt()
 11 | {
 12 | 	constexpr int N1 = mipp::N<T1>();
 13 | 	constexpr int N2 = mipp::N<T2>();
 14 | 
 15 | 	T1 inputs1[N1];
 16 | 	std::iota(inputs1, inputs1 + N1, (T1)-N1/2);
 17 | 
 18 | 	for (auto i = 0; i < N1; i++)
 19 | 		inputs1[i] += i % 2 ? (T1)0.4 : (T1)0.6;
 20 | 
 21 | 	mipp::reg r1 = mipp::load<T1>(inputs1);
 22 | 	mipp::reg r2 = N1 != N2 ? mipp::cvt<T1,T2>(mipp::low<T1>(r1)) : mipp::cvt<T1,T2>(r1);
 23 | 
 24 | 	for (auto i = 0; i < N2; i++)
 25 | 	{
 26 | 		auto res = static_cast<T2>(std::round(inputs1[i]));
 27 | 		REQUIRE(mipp::get<T2>(r2, i) == res);
 28 | 	}
 29 | }
 30 | 
 31 | #if !defined(MIPP_NO) && !defined(MIPP_SVE_LS)
 32 | TEST_CASE("Convert - mipp::reg", "[mipp::cvt]")
 33 | {
 34 | #if defined(MIPP_64BIT)
 35 | #if !defined(MIPP_AVX512) || (defined(MIPP_AVX512) && defined(__AVX512DQ__))
 36 | #if !defined(MIPP_SSE) && !defined(MIPP_AVX)
 37 | 	SECTION("datatype = int64_t -> double") { test_reg_cvt<int64_t,double>(); }
 38 | 	SECTION("datatype = double -> int64_t") { test_reg_cvt<double,int64_t>(); }
 39 | #endif
 40 | #endif
 41 | #endif
 42 | 	SECTION("datatype = int32_t -> float") { test_reg_cvt<int32_t,float>(); }
 43 | 	SECTION("datatype = float -> int32_t") { test_reg_cvt<float,int32_t>(); }
 44 | 
 45 | #if !defined(MIPP_AVX) || (defined(MIPP_AVX) && MIPP_INSTR_VERSION >= 2)
 46 | #if !defined(MIPP_SSE) || (defined(MIPP_SSE) && MIPP_INSTR_VERSION >= 41)
 47 | #if defined(MIPP_64BIT)
 48 | 	SECTION("datatype = int32_t -> int64_t") { test_reg_cvt<int32_t,int64_t>(); }
 49 | #endif
 50 | #if defined(MIPP_BW)
 51 | 	SECTION("datatype = int16_t -> int32_t") { test_reg_cvt<int16_t,int32_t>(); }
 52 | 	SECTION("datatype = int8_t -> int16_t") { test_reg_cvt<int8_t,int16_t>(); }
 53 | #endif
 54 | #endif
 55 | #endif
 56 | }
 57 | #endif
 58 | 
 59 | template <typename T1, typename T2>
 60 | void test_Reg_cvt()
 61 | {
 62 | 	constexpr int N1 = mipp::N<T1>();
 63 | 	constexpr int N2 = mipp::N<T2>();
 64 | 
 65 | 	T1 inputs1[N1];
 66 | 	std::iota(inputs1, inputs1 + N1, (T1)-N1/2);
 67 | 
 68 | 	for (auto i = 0; i < N1; i++)
 69 | 		inputs1[i] += i % 2 ? (T1)0.4 : (T1)0.6;
 70 | 
 71 | 	mipp::Reg<T1> r1 = inputs1;
 72 | 	mipp::Reg<T2> r2 = N1 != N2 ? mipp::cvt<T1,T2>(r1.low()) : mipp::cvt<T1,T2>(r1);
 73 | 
 74 | 	for (auto i = 0; i < N2; i++)
 75 | 	{
 76 | 		auto res = static_cast<T2>(std::round(inputs1[i]));
 77 | 		REQUIRE(r2[i] == res);
 78 | 	}
 79 | }
 80 | 
 81 | #if !defined(MIPP_SVE_LS)
 82 | TEST_CASE("Convert - mipp::Reg", "[mipp::cvt]")
 83 | {
 84 | #if defined(MIPP_64BIT)
 85 | #if !defined(MIPP_AVX512) || (defined(MIPP_AVX512) && defined(__AVX512DQ__))
 86 | #if !defined(MIPP_SSE) && !defined(MIPP_AVX)
 87 | 	SECTION("datatype = int64_t -> double") { test_Reg_cvt<int64_t,double>(); }
 88 | 	SECTION("datatype = double -> int64_t") { test_Reg_cvt<double,int64_t>(); }
 89 | #endif
 90 | #endif
 91 | #endif
 92 | 	SECTION("datatype = int32_t -> float") { test_Reg_cvt<int32_t,float>(); }
 93 | 	SECTION("datatype = float -> int32_t") { test_Reg_cvt<float,int32_t>(); }
 94 | 
 95 | #if !defined(MIPP_AVX) || (defined(MIPP_AVX) && MIPP_INSTR_VERSION >= 2)
 96 | #if !defined(MIPP_SSE) || (defined(MIPP_SSE) && MIPP_INSTR_VERSION >= 41)
 97 | #if defined(MIPP_64BIT)
 98 | 	SECTION("datatype = int32_t -> int64_t") { test_Reg_cvt<int32_t,int64_t>(); }
 99 | #endif
100 | #if defined(MIPP_BW)
101 | 	SECTION("datatype = int16_t -> int32_t") { test_Reg_cvt<int16_t,int32_t>(); }
102 | 	SECTION("datatype = int8_t -> int16_t") { test_Reg_cvt<int8_t,int16_t>(); }
103 | #endif
104 | #endif
105 | #endif
106 | }
107 | #endif
108 | 


--------------------------------------------------------------------------------
/tests/src/arithmetic_operations/norm.cpp:
--------------------------------------------------------------------------------
  1 | #include <exception>
  2 | #include <algorithm>
  3 | #include <numeric>
  4 | #include <random>
  5 | #include <cmath>
  6 | #include <mipp.h>
  7 | #include <catch.hpp>
  8 | 
  9 | template <typename T>
 10 | void test_reg_norm()
 11 | {
 12 | 	T inputs1[2*mipp::N<T>()];
 13 | 
 14 | 	std::iota(inputs1, inputs1 + 2*mipp::N<T>(), (T)1);
 15 | 
 16 | 	std::mt19937 g;
 17 | 	std::shuffle(inputs1, inputs1 + 2*mipp::N<T>(), g);
 18 | 
 19 | 	mipp::regx2 r1;
 20 | 
 21 | 	r1.val[0] = mipp::load<T>(inputs1);
 22 | 	r1.val[1] = mipp::load<T>(inputs1 + mipp::N<T>());
 23 | 
 24 | 	mipp::reg r2 = mipp::norm<T>(r1);
 25 | 
 26 | 	for (auto i = 0; i < mipp::N<T>(); i++)
 27 | 	{
 28 | 		T res = inputs1[i] * inputs1[i] + inputs1[mipp::N<T>() +i] * inputs1[mipp::N<T>() +i];
 29 | 
 30 | #if defined(MIPP_NEON) && MIPP_INSTR_VERSION == 1
 31 | 		REQUIRE(mipp::get<T>(r2, i) == Approx(res));
 32 | #else
 33 | 		REQUIRE(mipp::get<T>(r2, i) == res);
 34 | #endif
 35 | 	}
 36 | }
 37 | 
 38 | #ifndef MIPP_NO
 39 | TEST_CASE("Complex norm - mipp::reg", "[mipp::norm]")
 40 | {
 41 | #if defined(MIPP_64BIT)
 42 | 	SECTION("datatype = double") { test_reg_norm<double>(); }
 43 | #endif
 44 | 	SECTION("datatype = float") { test_reg_norm<float>(); }
 45 | 
 46 | #if !defined(MIPP_AVX) || (defined(MIPP_AVX) && MIPP_INSTR_VERSION >= 2)
 47 | #if !defined(MIPP_SSE) || (defined(MIPP_SSE) && MIPP_INSTR_VERSION >= 41)
 48 | 	SECTION("datatype = int32_t") { test_reg_norm<int32_t>(); }
 49 | #endif
 50 | #if defined(MIPP_BW)
 51 | #if !defined(MIPP_SSE) || (defined(MIPP_SSE) && MIPP_INSTR_VERSION >= 2)
 52 | 	SECTION("datatype = int16_t") { test_reg_norm<int16_t>(); }
 53 | #endif
 54 | #endif
 55 | // #if defined(MIPP_NEON)
 56 | // 	SECTION("datatype = int8_t") { test_reg_norm<int8_t>(); }
 57 | // #endif
 58 | #endif
 59 | }
 60 | #endif
 61 | 
 62 | template <typename T>
 63 | void test_Reg_norm()
 64 | {
 65 | 	T inputs1[2*mipp::N<T>()];
 66 | 
 67 | 	std::iota(inputs1, inputs1 + 2*mipp::N<T>(), (T)1);
 68 | 
 69 | 	std::mt19937 g;
 70 | 	std::shuffle(inputs1, inputs1 + 2*mipp::N<T>(), g);
 71 | 
 72 | 	mipp::Regx2<T> r1 = inputs1;
 73 | 	mipp::Reg<T> r2 = mipp::norm(r1);
 74 | 
 75 | 	for (auto i = 0; i < mipp::N<T>(); i++)
 76 | 	{
 77 | 		T res = inputs1[i] * inputs1[i] + inputs1[mipp::N<T>() +i] * inputs1[mipp::N<T>() +i];
 78 | 
 79 | #if defined(MIPP_NEON) && MIPP_INSTR_VERSION == 1
 80 | 		REQUIRE(r2[i] == Approx(res));
 81 | #else
 82 | 		REQUIRE(r2[i] == res);
 83 | #endif
 84 | 	}
 85 | }
 86 | 
 87 | TEST_CASE("Complex norm - mipp::Reg", "[mipp::norm]")
 88 | {
 89 | #if defined(MIPP_64BIT)
 90 | 	SECTION("datatype = double") { test_Reg_norm<double>(); }
 91 | #endif
 92 | 	SECTION("datatype = float") { test_Reg_norm<float>(); }
 93 | 
 94 | #if !defined(MIPP_AVX) || (defined(MIPP_AVX) && MIPP_INSTR_VERSION >= 2)
 95 | #if !defined(MIPP_SSE) || (defined(MIPP_SSE) && MIPP_INSTR_VERSION >= 41)
 96 | 	SECTION("datatype = int32_t") { test_Reg_norm<int32_t>(); }
 97 | #endif
 98 | #if defined(MIPP_BW)
 99 | #if !defined(MIPP_SSE) || (defined(MIPP_SSE) && MIPP_INSTR_VERSION >= 2)
100 | 	SECTION("datatype = int16_t") { test_Reg_norm<int16_t>(); }
101 | #endif
102 | #endif
103 | // #if defined(MIPP_NEON)
104 | // 	SECTION("datatype = int8_t") { test_Reg_norm<int8_t>(); }
105 | // #endif
106 | #endif
107 | }
108 | 


--------------------------------------------------------------------------------
/tests/src/arithmetic_operations/pack.cpp:
--------------------------------------------------------------------------------
  1 | #include <exception>
  2 | #include <algorithm>
  3 | #include <numeric>
  4 | #include <random>
  5 | #include <cmath>
  6 | #include <mipp.h>
  7 | #include <catch.hpp>
  8 | 
  9 | #ifndef MIPP_NO
 10 | template <typename T1, typename T2>
 11 | void test_reg_pack()
 12 | {
 13 | 	constexpr int N1 = mipp::N<T1>();
 14 | 	constexpr int N2 = mipp::N<T2>();
 15 | 
 16 | 	static_assert(N1 < N2, "N1 has to be smaller than N2.");
 17 | 
 18 | 	T1 inputs1[N1], inputs2[N1];
 19 | 	std::iota(inputs1, inputs1 + N1, std::numeric_limits<T1>::max() - (T1)N1);
 20 | 	std::iota(inputs2, inputs2 + N1, (T1)-N1/2);
 21 | 
 22 | 	std::mt19937 g;
 23 | 	std::shuffle(inputs1, inputs1 + N1, g);
 24 | 	std::shuffle(inputs2, inputs2 + N1, g);
 25 | 
 26 | 	mipp::reg r1 = mipp::load<T1>(inputs1);
 27 | 	mipp::reg r2 = mipp::load<T1>(inputs2);
 28 | 	mipp::reg r3 = mipp::pack<T1,T2>(r1, r2);
 29 | 
 30 | 	T1 m = (T1)std::numeric_limits<T2>::min();
 31 | 	T1 M = (T1)std::numeric_limits<T2>::max();
 32 | 	for (auto i = 0; i < N1; i++)
 33 | 	{
 34 | 		auto res = static_cast<T2>(std::min<T1>(std::max<T1>(inputs1[i], m), M));
 35 | 		REQUIRE(mipp::get<T2>(r3, i) == res);
 36 | 	}
 37 | 
 38 | 	for (auto i = 0; i < N1; i++)
 39 | 	{
 40 | 		auto res = static_cast<T2>(std::min<T1>(std::max<T1>(inputs2[i], m), M));
 41 | 		REQUIRE(mipp::get<T2>(r3, N1 +i) == res);
 42 | 	}
 43 | }
 44 | #endif
 45 | 
 46 | #if !defined(MIPP_NO) && !defined(MIPP_SVE_LS)
 47 | TEST_CASE("Pack - mipp::reg", "[mipp::pack]")
 48 | {
 49 | #if defined(MIPP_BW)
 50 | 	SECTION("datatype = int32_t -> int16_t") { test_reg_pack<int32_t,int16_t>(); }
 51 | 	SECTION("datatype = int16_t -> int8_t") { test_reg_pack<int16_t,int8_t>(); }
 52 | #endif
 53 | }
 54 | #endif
 55 | 
 56 | #ifndef MIPP_NO
 57 | template <typename T1, typename T2>
 58 | void test_Reg_pack()
 59 | {
 60 | 	constexpr int N1 = mipp::N<T1>();
 61 | 	constexpr int N2 = mipp::N<T2>();
 62 | 
 63 | 	static_assert(N1 < N2, "N1 has to be smaller than N2.");
 64 | 
 65 | 	T1 inputs1[N1], inputs2[N1];
 66 | 	std::iota(inputs1, inputs1 + N1, std::numeric_limits<T1>::max() - (T1)N1);
 67 | 	std::iota(inputs2, inputs2 + N1, (T1)-N1/2);
 68 | 
 69 | 	std::mt19937 g;
 70 | 	std::shuffle(inputs1, inputs1 + N1, g);
 71 | 	std::shuffle(inputs2, inputs2 + N1, g);
 72 | 
 73 | 	mipp::Reg<T1> r1 = inputs1;
 74 | 	mipp::Reg<T1> r2 = inputs2;
 75 | 	mipp::Reg<T2> r3 = mipp::pack<T1,T2>(r1, r2);
 76 | 
 77 | 	T1 m = (T1)std::numeric_limits<T2>::min();
 78 | 	T1 M = (T1)std::numeric_limits<T2>::max();
 79 | 	for (auto i = 0; i < N1; i++)
 80 | 	{
 81 | 		auto res = static_cast<T2>(std::min<T1>(std::max<T1>(inputs1[i], m), M));
 82 | 		REQUIRE(r3[i] == res);
 83 | 	}
 84 | 
 85 | 	for (auto i = 0; i < N1; i++)
 86 | 	{
 87 | 		auto res = static_cast<T2>(std::min<T1>(std::max<T1>(inputs2[i], m), M));
 88 | 		REQUIRE(r3[N1 +i] == res);
 89 | 	}
 90 | }
 91 | #endif
 92 | 
 93 | #if !defined(MIPP_NO) && !defined(MIPP_SVE_LS)
 94 | TEST_CASE("Pack - mipp::Reg", "[mipp::pack]")
 95 | {
 96 | #if defined(MIPP_BW)
 97 | 	SECTION("datatype = int32_t -> int16_t") { test_Reg_pack<int32_t,int16_t>(); }
 98 | 	SECTION("datatype = int16_t -> int8_t") { test_Reg_pack<int16_t,int8_t>(); }
 99 | #endif
100 | }
101 | #endif
102 | 


--------------------------------------------------------------------------------
/tests/src/arithmetic_operations/sat.cpp:
--------------------------------------------------------------------------------
  1 | #include <type_traits>
  2 | #include <exception>
  3 | #include <algorithm>
  4 | #include <numeric>
  5 | #include <random>
  6 | #include <cmath>
  7 | #include <mipp.h>
  8 | #include <catch.hpp>
  9 | 
 10 | template <typename T>
 11 | void test_reg_sat_int()
 12 | {
 13 | 	constexpr int N = mipp::N<T>();
 14 | 	T inputs1[N];
 15 | 	std::mt19937 g;
 16 | 	std::uniform_int_distribution<T> dis(std::numeric_limits<T>::min(), std::numeric_limits<T>::max());
 17 | 
 18 | 	for (auto i = 0; i < 100; i++)
 19 | 	{
 20 | 		for (auto i = 0; i < N; i++)
 21 | 			inputs1[i] = dis(g);
 22 | 
 23 | 		mipp::reg r1 = mipp::load<T>(inputs1);
 24 | 
 25 | 		T n1 = dis(g);
 26 | 		T n2 = dis(g);
 27 | 
 28 | 		T m = std::min<T>(n1, n2);
 29 | 		T M = std::max<T>(n1, n2);
 30 | 
 31 | 		mipp::reg r2 = mipp::sat<T>(r1, m, M);
 32 | 
 33 | 		for (auto i = 0; i < mipp::N<T>(); i++)
 34 | 		{
 35 | 			T res = std::min<T>(std::max<T>(inputs1[i], m), M);
 36 | 			REQUIRE(mipp::get<T>(r2, i) == res);
 37 | 		}
 38 | 	}
 39 | }
 40 | 
 41 | template <typename T>
 42 | void test_reg_sat_real()
 43 | {
 44 | 	constexpr int N = mipp::N<T>();
 45 | 	T inputs1[N];
 46 | 	std::mt19937 g;
 47 | 	std::uniform_real_distribution<T> dis(std::numeric_limits<T>::min(), std::numeric_limits<T>::max());
 48 | 
 49 | 	for (auto i = 0; i < 100; i++)
 50 | 	{
 51 | 		for (auto i = 0; i < N; i++)
 52 | 			inputs1[i] = dis(g);
 53 | 
 54 | 		mipp::reg r1 = mipp::load<T>(inputs1);
 55 | 
 56 | 		T n1 = dis(g);
 57 | 		T n2 = dis(g);
 58 | 
 59 | 		T m = std::min<T>(n1, n2);
 60 | 		T M = std::max<T>(n1, n2);
 61 | 
 62 | 		mipp::reg r2 = mipp::sat<T>(r1, m, M);
 63 | 
 64 | 		for (auto i = 0; i < mipp::N<T>(); i++)
 65 | 		{
 66 | 			T res = std::min<T>(std::max<T>(inputs1[i], m), M);
 67 | 			REQUIRE(mipp::get<T>(r2, i) == res);
 68 | 		}
 69 | 	}
 70 | }
 71 | 
 72 | #if !defined(MIPP_NO) && !defined(MIPP_SVE_LS)
 73 | TEST_CASE("Saturation - mipp::reg", "[mipp::sat]")
 74 | {
 75 | #if defined(MIPP_64BIT)
 76 | 	SECTION("datatype = double") { test_reg_sat_real<double>(); }
 77 | #endif
 78 | 	SECTION("datatype = float") { test_reg_sat_real<float>(); }
 79 | 
 80 | #if defined(MIPP_64BIT)
 81 | #if !defined(MIPP_SSE) && !defined(MIPP_AVX) && !defined(MIPP_NEON)
 82 | 	SECTION("datatype = int64_t") { test_reg_sat_int<int64_t>(); }
 83 | 	SECTION("datatype = uint64_t") { test_reg_sat_int<uint64_t>(); }
 84 | #endif
 85 | #endif
 86 | #if !defined(MIPP_AVX) || (defined(MIPP_AVX) && MIPP_INSTR_VERSION >= 2)
 87 | #if !defined(MIPP_SSE) || (defined(MIPP_SSE) && MIPP_INSTR_VERSION >= 41)
 88 | 	SECTION("datatype = int32_t") { test_reg_sat_int<int32_t>(); }
 89 | 	SECTION("datatype = uint32_t") { test_reg_sat_int<uint32_t>(); }
 90 | #endif
 91 | #endif
 92 | #if defined(MIPP_BW)
 93 | 	SECTION("datatype = int16_t") { test_reg_sat_int<int16_t>(); }
 94 | #if !defined(MIPP_SSE) || (defined(MIPP_SSE) && MIPP_INSTR_VERSION >= 41)
 95 | 	SECTION("datatype = uint16_t") { test_reg_sat_int<uint16_t>(); }
 96 | #ifndef _MSC_VER
 97 | 	SECTION("datatype = int8_t") { test_reg_sat_int<int8_t>(); }
 98 | 	SECTION("datatype = uint8_t") { test_reg_sat_int<uint8_t>(); }
 99 | #endif
100 | #endif
101 | #endif
102 | }
103 | #endif
104 | 
105 | template <typename T>
106 | void test_Reg_sat_int()
107 | {
108 | 	constexpr int N = mipp::N<T>();
109 | 	T inputs1[N];
110 | 	std::mt19937 g;
111 | 	std::uniform_int_distribution<T> dis(std::numeric_limits<T>::min(), std::numeric_limits<T>::max());
112 | 
113 | 	for (auto i = 0; i < 100; i++)
114 | 	{
115 | 		for (auto i = 0; i < N; i++)
116 | 			inputs1[i] = dis(g);
117 | 
118 | 		mipp::Reg<T> r1 = inputs1;
119 | 
120 | 		T n1 = dis(g);
121 | 		T n2 = dis(g);
122 | 
123 | 		T m = std::min<T>(n1, n2);
124 | 		T M = std::max<T>(n1, n2);
125 | 
126 | 		mipp::Reg<T> r2 = mipp::sat(r1, m, M);
127 | 
128 | 		for (auto i = 0; i < mipp::N<T>(); i++)
129 | 		{
130 | 			T res = std::min<T>(std::max<T>(inputs1[i], m), M);
131 | 			REQUIRE(r2[i] == res);
132 | 		}
133 | 	}
134 | }
135 | 
136 | template <typename T>
137 | void test_Reg_sat_real()
138 | {
139 | 	constexpr int N = mipp::N<T>();
140 | 	T inputs1[N];
141 | 	std::mt19937 g;
142 | 	std::uniform_real_distribution<T> dis(std::numeric_limits<T>::min(), std::numeric_limits<T>::max());
143 | 
144 | 	for (auto i = 0; i < 100; i++)
145 | 	{
146 | 		for (auto i = 0; i < N; i++)
147 | 			inputs1[i] = dis(g);
148 | 
149 | 		mipp::Reg<T> r1 = inputs1;
150 | 
151 | 		T n1 = dis(g);
152 | 		T n2 = dis(g);
153 | 
154 | 		T m = std::min<T>(n1, n2);
155 | 		T M = std::max<T>(n1, n2);
156 | 
157 | 		mipp::Reg<T> r2 = mipp::sat(r1, m, M);
158 | 
159 | 		for (auto i = 0; i < mipp::N<T>(); i++)
160 | 		{
161 | 			T res = std::min<T>(std::max<T>(inputs1[i], m), M);
162 | 			REQUIRE(r2[i] == res);
163 | 		}
164 | 	}
165 | }
166 | 
167 | #if !defined(MIPP_SVE_LS)
168 | TEST_CASE("Saturation - mipp::Reg", "[mipp::sat]")
169 | {
170 | #if defined(MIPP_64BIT)
171 | 	SECTION("datatype = double") { test_Reg_sat_real<double>(); }
172 | #endif
173 | 	SECTION("datatype = float") { test_Reg_sat_real<float>(); }
174 | 
175 | #if defined(MIPP_64BIT)
176 | #if !defined(MIPP_SSE) && !defined(MIPP_AVX) && !defined(MIPP_NEON)
177 | 	SECTION("datatype = int64_t") { test_Reg_sat_int<int64_t>(); }
178 | 	SECTION("datatype = uint64_t") { test_Reg_sat_int<uint64_t>(); }
179 | #endif
180 | #endif
181 | #if !defined(MIPP_AVX) || (defined(MIPP_AVX) && MIPP_INSTR_VERSION >= 2)
182 | #if !defined(MIPP_SSE) || (defined(MIPP_SSE) && MIPP_INSTR_VERSION >= 41)
183 | 	SECTION("datatype = int32_t") { test_Reg_sat_int<int32_t>(); }
184 | 	SECTION("datatype = uint32_t") { test_Reg_sat_int<uint32_t>(); }
185 | #endif
186 | #endif
187 | #if defined(MIPP_BW)
188 | 	SECTION("datatype = int16_t") { test_Reg_sat_int<int16_t>(); }
189 | #if !defined(MIPP_SSE) || (defined(MIPP_SSE) && MIPP_INSTR_VERSION >= 41)
190 | 	SECTION("datatype = uint16_t") { test_Reg_sat_int<uint16_t>(); }
191 | #ifndef _MSC_VER
192 | 	SECTION("datatype = int8_t") { test_Reg_sat_int<int8_t>(); }
193 | 	SECTION("datatype = uint8_t") { test_Reg_sat_int<uint8_t>(); }
194 | #endif
195 | #endif
196 | #endif
197 | }
198 | #endif
199 | 


--------------------------------------------------------------------------------
/tests/src/arithmetic_operations/sign.cpp:
--------------------------------------------------------------------------------
  1 | #include <exception>
  2 | #include <algorithm>
  3 | #include <numeric>
  4 | #include <random>
  5 | #include <cmath>
  6 | #include <mipp.h>
  7 | #include <catch.hpp>
  8 | 
  9 | template <typename T>
 10 | void test_reg_sign()
 11 | {
 12 | 	constexpr int N = mipp::N<T>();
 13 | 	T inputs1[N];
 14 | 	std::iota(inputs1, inputs1 + N, (T)-N/2);
 15 | 
 16 | 	std::mt19937 g;
 17 | 	std::shuffle(inputs1, inputs1 + N, g);
 18 | 
 19 | 	mipp::reg r1 = mipp::load<T>(inputs1);
 20 | 	mipp::msk m  = mipp::sign<T>(r1);
 21 | 	mipp::reg r2 = mipp::toreg<N>(m);
 22 | 
 23 | 	for (auto i = 0; i < N; i++)
 24 | 	{
 25 | 		if (inputs1[i] >= 0)
 26 | 			REQUIRE(mipp::get<T>(r2, i) == (T)0);
 27 | 		else
 28 | 			REQUIRE(mipp::get<T>(r2, i) != (T)0);
 29 | 	}
 30 | }
 31 | 
 32 | #if !defined(MIPP_NO) && !defined(MIPP_SVE_LS)
 33 | TEST_CASE("Sign - mipp::reg", "[mipp::sign]")
 34 | {
 35 | #if defined(MIPP_64BIT)
 36 | 	SECTION("datatype = double") { test_reg_sign<double>(); }
 37 | #endif
 38 | 	SECTION("datatype = float") { test_reg_sign<float>(); }
 39 | 
 40 | #if defined(MIPP_64BIT)
 41 | #if !defined(MIPP_AVX) || (defined(MIPP_AVX) && MIPP_INSTR_VERSION >= 2)
 42 | #if !defined(MIPP_SSE)
 43 | 	SECTION("datatype = int64_t") { test_reg_sign<int64_t>(); }
 44 | #endif
 45 | #endif
 46 | #endif
 47 | #if !defined(MIPP_AVX) || (defined(MIPP_AVX) && MIPP_INSTR_VERSION >= 2)
 48 | 	SECTION("datatype = int32_t") { test_reg_sign<int32_t>(); }
 49 | #endif
 50 | #if defined(MIPP_BW)
 51 | 	SECTION("datatype = int16_t") { test_reg_sign<int16_t>(); }
 52 | 	SECTION("datatype = int8_t") { test_reg_sign<int8_t>(); }
 53 | #endif
 54 | }
 55 | #endif
 56 | 
 57 | template <typename T>
 58 | void test_Reg_sign()
 59 | {
 60 | 	constexpr int N = mipp::N<T>();
 61 | 	T inputs1[N];
 62 | 	std::iota(inputs1, inputs1 + N, (T)-N/2);
 63 | 
 64 | 	std::mt19937 g;
 65 | 	std::shuffle(inputs1, inputs1 + N, g);
 66 | 
 67 | 	mipp::Reg<T> r1 = inputs1;
 68 | 	mipp::Msk<N> m  = mipp::sign(r1);
 69 | 	mipp::Reg<T> r2 = mipp::toreg<N>(m.m);
 70 | 
 71 | 	for (auto i = 0; i < N; i++)
 72 | 	{
 73 | 		if (inputs1[i] >= 0)
 74 | 			REQUIRE(r2[i] == (T)0);
 75 | 		else
 76 | 			REQUIRE(r2[i] != (T)0);
 77 | 	}
 78 | }
 79 | 
 80 | #if !defined(MIPP_SVE_LS)
 81 | TEST_CASE("Sign - mipp::Reg", "[mipp::sign]")
 82 | {
 83 | #if defined(MIPP_64BIT)
 84 | 	SECTION("datatype = double") { test_Reg_sign<double>(); }
 85 | #endif
 86 | 	SECTION("datatype = float") { test_Reg_sign<float>(); }
 87 | 
 88 | #if defined(MIPP_64BIT)
 89 | #if !defined(MIPP_AVX) || (defined(MIPP_AVX) && MIPP_INSTR_VERSION >= 2)
 90 | #if !defined(MIPP_SSE)
 91 | 	SECTION("datatype = int64_t") { test_Reg_sign<int64_t>(); }
 92 | #endif
 93 | #endif
 94 | #endif
 95 | #if !defined(MIPP_AVX) || (defined(MIPP_AVX) && MIPP_INSTR_VERSION >= 2)
 96 | 	SECTION("datatype = int32_t") { test_Reg_sign<int32_t>(); }
 97 | #endif
 98 | #if defined(MIPP_BW)
 99 | 	SECTION("datatype = int16_t") { test_Reg_sign<int16_t>(); }
100 | 	SECTION("datatype = int8_t") { test_Reg_sign<int8_t>(); }
101 | #endif
102 | }
103 | #endif
104 | 


--------------------------------------------------------------------------------
/tests/src/bitwise_operations/andb.cpp:
--------------------------------------------------------------------------------
  1 | #include <exception>
  2 | #include <algorithm>
  3 | #include <numeric>
  4 | #include <random>
  5 | #include <cmath>
  6 | #include <mipp.h>
  7 | #include <catch.hpp>
  8 | 
  9 | template <typename T>
 10 | void test_reg_andb()
 11 | {
 12 | 	T inputs1[mipp::N<T>()], inputs2[mipp::N<T>()];
 13 | 	std::iota(inputs1, inputs1 + mipp::N<T>(), (T)0);
 14 | 	std::iota(inputs2, inputs2 + mipp::N<T>(), (T)0);
 15 | 
 16 | 	std::mt19937 g;
 17 | 	std::shuffle(inputs1, inputs1 + mipp::N<T>(), g);
 18 | 	std::shuffle(inputs2, inputs2 + mipp::N<T>(), g);
 19 | 
 20 | 	mipp::reg r1 = mipp::load<T>(inputs1);
 21 | 	mipp::reg r2 = mipp::load<T>(inputs2);
 22 | 	mipp::reg r3 = mipp::andb<T>(r1, r2);
 23 | 
 24 | 	for (auto i = 0; i < mipp::N<T>(); i++)
 25 | 	{
 26 | 		T res = inputs1[i] & inputs2[i];
 27 | 		REQUIRE(mipp::get<T>(r3, i) == res);
 28 | 	}
 29 | }
 30 | 
 31 | #if !defined(MIPP_NO) && !defined(MIPP_SVE_LS)
 32 | TEST_CASE("Binary and - mipp::reg", "[mipp::andb]")
 33 | {
 34 | #if defined(MIPP_64BIT)
 35 | 	SECTION("datatype = int64_t") { test_reg_andb<int64_t>(); }
 36 | #endif
 37 | 	SECTION("datatype = int32_t") { test_reg_andb<int32_t>(); }
 38 | #if defined(MIPP_BW)
 39 | 	SECTION("datatype = int16_t") { test_reg_andb<int16_t>(); }
 40 | 	SECTION("datatype = int8_t") { test_reg_andb<int8_t>(); }
 41 | #endif
 42 | }
 43 | #endif
 44 | 
 45 | template <typename T>
 46 | void test_Reg_andb()
 47 | {
 48 | 	T inputs1[mipp::N<T>()], inputs2[mipp::N<T>()];
 49 | 	std::iota(inputs1, inputs1 + mipp::N<T>(), (T)0);
 50 | 	std::iota(inputs2, inputs2 + mipp::N<T>(), (T)0);
 51 | 
 52 | 	std::mt19937 g;
 53 | 	std::shuffle(inputs1, inputs1 + mipp::N<T>(), g);
 54 | 	std::shuffle(inputs2, inputs2 + mipp::N<T>(), g);
 55 | 
 56 | 	mipp::Reg<T> r1 = inputs1;
 57 | 	mipp::Reg<T> r2 = inputs2;
 58 | 	mipp::Reg<T> r3 = r1 & r2;
 59 | 
 60 | 	for (auto i = 0; i < mipp::N<T>(); i++)
 61 | 	{
 62 | 		T res = inputs1[i] & inputs2[i];
 63 | 		REQUIRE(r3[i] == res);
 64 | 	}
 65 | }
 66 | 
 67 | #if !defined(MIPP_SVE_LS)
 68 | TEST_CASE("Binary and - mipp::Reg", "[mipp::andb]")
 69 | {
 70 | #if defined(MIPP_64BIT)
 71 | 	SECTION("datatype = int64_t") { test_Reg_andb<int64_t>(); }
 72 | #endif
 73 | 	SECTION("datatype = int32_t") { test_Reg_andb<int32_t>(); }
 74 | #if defined(MIPP_BW)
 75 | 	SECTION("datatype = int16_t") { test_Reg_andb<int16_t>(); }
 76 | 	SECTION("datatype = int8_t") { test_Reg_andb<int8_t>(); }
 77 | #endif
 78 | }
 79 | #endif
 80 | 
 81 | template <typename T>
 82 | void test_msk_andb()
 83 | {
 84 | 	constexpr int N = mipp::N<T>();
 85 | 	bool inputs1[N], inputs2[N];
 86 | 	std::mt19937 g;
 87 | 	std::uniform_int_distribution<uint16_t> dis(0, 1);
 88 | 
 89 | 	for (auto t = 0; t < 100; t++)
 90 | 	{
 91 | 		for (auto i = 0; i < N; i++)
 92 | 		{
 93 | 			inputs1[i] = dis(g) ? true : false;
 94 | 			inputs2[i] = dis(g) ? true : false;
 95 | 		}
 96 | 
 97 | 		std::shuffle(inputs1, inputs1 + mipp::N<T>(), g);
 98 | 		std::shuffle(inputs2, inputs2 + mipp::N<T>(), g);
 99 | 
100 | 		mipp::msk m1 = mipp::set<N>(inputs1);
101 | 		mipp::msk m2 = mipp::set<N>(inputs2);
102 | 		mipp::msk m3 = mipp::andb<N>(m1, m2);
103 | 
104 | 		mipp::reg r = mipp::toreg<N>(m3);
105 | 
106 | 		for (auto i = 0; i < N; i++)
107 | 		{
108 | 			bool res = inputs1[i] & inputs2[i];
109 | 
110 | 			if (res)
111 | 				REQUIRE(mipp::get<T>(r, i) != (T)0);
112 | 			else
113 | 				REQUIRE(mipp::get<T>(r, i) == (T)res);
114 | 		}
115 | 	}
116 | }
117 | 
118 | #ifndef MIPP_NO
119 | TEST_CASE("Binary and - mipp::msk", "[mipp::andb]")
120 | {
121 | #if defined(MIPP_64BIT)
122 | 	SECTION("datatype = int64_t") { test_msk_andb<int64_t>(); }
123 | #endif
124 | 	SECTION("datatype = int32_t") { test_msk_andb<int32_t>(); }
125 | #if defined(MIPP_BW)
126 | 	SECTION("datatype = int16_t") { test_msk_andb<int16_t>(); }
127 | 	SECTION("datatype = int8_t") { test_msk_andb<int8_t>(); }
128 | #endif
129 | }
130 | #endif
131 | 
132 | template <typename T>
133 | void test_Msk_andb()
134 | {
135 | 	constexpr int N = mipp::N<T>();
136 | 	bool inputs1[N], inputs2[N];
137 | 	std::mt19937 g;
138 | 	std::uniform_int_distribution<uint16_t> dis(0, 1);
139 | 
140 | 	for (auto t = 0; t < 100; t++)
141 | 	{
142 | 		for (auto i = 0; i < N; i++)
143 | 		{
144 | 			inputs1[i] = dis(g) ? true : false;
145 | 			inputs2[i] = dis(g) ? true : false;
146 | 		}
147 | 
148 | 		std::shuffle(inputs1, inputs1 + mipp::N<T>(), g);
149 | 		std::shuffle(inputs2, inputs2 + mipp::N<T>(), g);
150 | 
151 | 		mipp::Msk<N> m1 = inputs1;
152 | 		mipp::Msk<N> m2 = inputs2;
153 | 		mipp::Msk<N> m3 = m1 & m2;
154 | 
155 | 		for (auto i = 0; i < N; i++)
156 | 		{
157 | 			bool res = inputs1[i] & inputs2[i];
158 | 			REQUIRE(m3[i] == res);
159 | 		}
160 | 	}
161 | }
162 | 
163 | TEST_CASE("Binary and - mipp::Msk", "[mipp::andb]")
164 | {
165 | #if defined(MIPP_64BIT)
166 | 	SECTION("datatype = int64_t") { test_Msk_andb<int64_t>(); }
167 | #endif
168 | 	SECTION("datatype = int32_t") { test_Msk_andb<int32_t>(); }
169 | #if defined(MIPP_BW)
170 | 	SECTION("datatype = int16_t") { test_Msk_andb<int16_t>(); }
171 | 	SECTION("datatype = int8_t") { test_Msk_andb<int8_t>(); }
172 | #endif
173 | }
174 | 


--------------------------------------------------------------------------------
/tests/src/bitwise_operations/andnb.cpp:
--------------------------------------------------------------------------------
  1 | #include <exception>
  2 | #include <algorithm>
  3 | #include <numeric>
  4 | #include <random>
  5 | #include <cmath>
  6 | #include <mipp.h>
  7 | #include <catch.hpp>
  8 | 
  9 | template <typename T>
 10 | void test_reg_andnb()
 11 | {
 12 | 	T inputs1[mipp::N<T>()], inputs2[mipp::N<T>()];
 13 | 	std::iota(inputs1, inputs1 + mipp::N<T>(), (T)0);
 14 | 	std::iota(inputs2, inputs2 + mipp::N<T>(), (T)0);
 15 | 
 16 | 	std::mt19937 g;
 17 | 	std::shuffle(inputs1, inputs1 + mipp::N<T>(), g);
 18 | 	std::shuffle(inputs2, inputs2 + mipp::N<T>(), g);
 19 | 
 20 | 	mipp::reg r1 = mipp::load<T>(inputs1);
 21 | 	mipp::reg r2 = mipp::load<T>(inputs2);
 22 | 	mipp::reg r3 = mipp::andnb<T>(r1, r2);
 23 | 
 24 | 	for (auto i = 0; i < mipp::N<T>(); i++)
 25 | 	{
 26 | 		T res = ~inputs1[i] & inputs2[i];
 27 | 		REQUIRE(mipp::get<T>(r3, i) == res);
 28 | 	}
 29 | }
 30 | 
 31 | #if !defined(MIPP_NO) && !defined(MIPP_SVE_LS)
 32 | TEST_CASE("Binary not and - mipp::reg", "[mipp::andnb]")
 33 | {
 34 | #if defined(MIPP_64BIT)
 35 | 	SECTION("datatype = int64_t") { test_reg_andnb<int64_t>(); }
 36 | #endif
 37 | 	SECTION("datatype = int32_t") { test_reg_andnb<int32_t>(); }
 38 | #if defined(MIPP_BW)
 39 | 	SECTION("datatype = int16_t") { test_reg_andnb<int16_t>(); }
 40 | 	SECTION("datatype = int8_t") { test_reg_andnb<int8_t>(); }
 41 | #endif
 42 | }
 43 | #endif
 44 | 
 45 | template <typename T>
 46 | void test_Reg_andnb()
 47 | {
 48 | 	T inputs1[mipp::N<T>()], inputs2[mipp::N<T>()];
 49 | 	std::iota(inputs1, inputs1 + mipp::N<T>(), (T)0);
 50 | 	std::iota(inputs2, inputs2 + mipp::N<T>(), (T)0);
 51 | 
 52 | 	std::mt19937 g;
 53 | 	std::shuffle(inputs1, inputs1 + mipp::N<T>(), g);
 54 | 	std::shuffle(inputs2, inputs2 + mipp::N<T>(), g);
 55 | 
 56 | 	mipp::Reg<T> r1 = inputs1;
 57 | 	mipp::Reg<T> r2 = inputs2;
 58 | 	mipp::Reg<T> r3 = mipp::andnb(r1, r2);
 59 | 
 60 | 	for (auto i = 0; i < mipp::N<T>(); i++)
 61 | 	{
 62 | 		T res = ~inputs1[i] & inputs2[i];
 63 | 		REQUIRE(r3[i] == res);
 64 | 	}
 65 | }
 66 | 
 67 | #if !defined(MIPP_SVE_LS)
 68 | TEST_CASE("Binary not and - mipp::Reg", "[mipp::andnb]")
 69 | {
 70 | #if defined(MIPP_64BIT)
 71 | 	SECTION("datatype = int64_t") { test_Reg_andnb<int64_t>(); }
 72 | #endif
 73 | 	SECTION("datatype = int32_t") { test_Reg_andnb<int32_t>(); }
 74 | #if defined(MIPP_BW)
 75 | 	SECTION("datatype = int16_t") { test_Reg_andnb<int16_t>(); }
 76 | 	SECTION("datatype = int8_t") { test_Reg_andnb<int8_t>(); }
 77 | #endif
 78 | }
 79 | #endif
 80 | 
 81 | template <typename T>
 82 | void test_msk_andnb()
 83 | {
 84 | 	constexpr int N = mipp::N<T>();
 85 | 	bool inputs1[N], inputs2[N];
 86 | 	std::mt19937 g;
 87 | 	std::uniform_int_distribution<uint16_t> dis(0, 1);
 88 | 
 89 | 	for (auto t = 0; t < 100; t++)
 90 | 	{
 91 | 		for (auto i = 0; i < N; i++)
 92 | 		{
 93 | 			inputs1[i] = dis(g) ? true : false;
 94 | 			inputs2[i] = dis(g) ? true : false;
 95 | 		}
 96 | 
 97 | 		std::shuffle(inputs1, inputs1 + mipp::N<T>(), g);
 98 | 		std::shuffle(inputs2, inputs2 + mipp::N<T>(), g);
 99 | 
100 | 		mipp::msk m1 = mipp::set<N>(inputs1);
101 | 		mipp::msk m2 = mipp::set<N>(inputs2);
102 | 		mipp::msk m3 = mipp::andnb<N>(m1, m2);
103 | 
104 | 		mipp::reg r = mipp::toreg<N>(m3);
105 | 
106 | 		for (auto i = 0; i < N; i++)
107 | 		{
108 | 			bool res = !inputs1[i] & inputs2[i];
109 | 
110 | 			if (res)
111 | 				REQUIRE(mipp::get<T>(r, i) != (T)0);
112 | 			else
113 | 				REQUIRE(mipp::get<T>(r, i) == (T)res);
114 | 		}
115 | 	}
116 | }
117 | 
118 | #if !defined(MIPP_NO) && !defined(MIPP_SVE_LS)
119 | TEST_CASE("Binary not and - mipp::msk", "[mipp::andnb]")
120 | {
121 | #if defined(MIPP_64BIT)
122 | 	SECTION("datatype = int64_t") { test_msk_andnb<int64_t>(); }
123 | #endif
124 | 	SECTION("datatype = int32_t") { test_msk_andnb<int32_t>(); }
125 | #if defined(MIPP_BW)
126 | 	SECTION("datatype = int16_t") { test_msk_andnb<int16_t>(); }
127 | 	SECTION("datatype = int8_t") { test_msk_andnb<int8_t>(); }
128 | #endif
129 | }
130 | #endif
131 | 
132 | template <typename T>
133 | void test_Msk_andnb()
134 | {
135 | 	constexpr int N = mipp::N<T>();
136 | 	bool inputs1[N], inputs2[N];
137 | 	std::mt19937 g;
138 | 	std::uniform_int_distribution<uint16_t> dis(0, 1);
139 | 
140 | 	for (auto t = 0; t < 100; t++)
141 | 	{
142 | 		for (auto i = 0; i < N; i++)
143 | 		{
144 | 			inputs1[i] = dis(g) ? true : false;
145 | 			inputs2[i] = dis(g) ? true : false;
146 | 		}
147 | 
148 | 		std::shuffle(inputs1, inputs1 + mipp::N<T>(), g);
149 | 		std::shuffle(inputs2, inputs2 + mipp::N<T>(), g);
150 | 
151 | 		mipp::Msk<N> m1 = inputs1;
152 | 		mipp::Msk<N> m2 = inputs2;
153 | 		mipp::Msk<N> m3 = mipp::andnb(m1, m2);
154 | 
155 | 		for (auto i = 0; i < N; i++)
156 | 		{
157 | 			bool res = !inputs1[i] & inputs2[i];
158 | 			REQUIRE(m3[i] == res);
159 | 		}
160 | 	}
161 | }
162 | 
163 | #if !defined(MIPP_SVE_LS)
164 | TEST_CASE("Binary not and - mipp::Msk", "[mipp::andnb]")
165 | {
166 | #if defined(MIPP_64BIT)
167 | 	SECTION("datatype = int64_t") { test_Msk_andnb<int64_t>(); }
168 | #endif
169 | 	SECTION("datatype = int32_t") { test_Msk_andnb<int32_t>(); }
170 | #if defined(MIPP_BW)
171 | 	SECTION("datatype = int16_t") { test_Msk_andnb<int16_t>(); }
172 | 	SECTION("datatype = int8_t") { test_Msk_andnb<int8_t>(); }
173 | #endif
174 | }
175 | #endif
176 | 


--------------------------------------------------------------------------------
/tests/src/bitwise_operations/lshiftr.cpp:
--------------------------------------------------------------------------------
 1 | #include <exception>
 2 | #include <algorithm>
 3 | #include <numeric>
 4 | #include <random>
 5 | #include <cmath>
 6 | #include <mipp.h>
 7 | #include <catch.hpp>
 8 | 
 9 | template <typename T>
10 | void test_reg_lshiftr()
11 | {
12 | 	T inputs1[mipp::N<T>()];
13 | 	T inputs2[mipp::N<T>()];
14 | 	std::iota(inputs1, inputs1 + mipp::N<T>(), (T)0);
15 | 	std::iota(inputs2, inputs2 + mipp::N<T>(), (T)0);
16 | 
17 | 	std::mt19937 g;
18 | 	std::shuffle(inputs1, inputs1 + mipp::N<T>(), g);
19 | 	std::shuffle(inputs2, inputs2 + mipp::N<T>(), g);
20 | 
21 | 	mipp::reg r1 = mipp::load<T>(inputs1);
22 | 	mipp::reg r2 = mipp::load<T>(inputs2);
23 | 	mipp::reg r3 = mipp::lshiftr<T>(r1, r2);
24 | 
25 | 	for (auto i = 0; i < mipp::N<T>(); i++)
26 | 	{
27 | 		T res = inputs1[i] << inputs2[i];
28 | 		REQUIRE(mipp::get<T>(r3, i) == res);
29 | 	}
30 | }
31 | 
32 | #if !defined(MIPP_NO) && !defined(MIPP_SVE_LS)
33 | TEST_CASE("Binary left shift (register) - mipp::reg", "[mipp::lshiftr]")
34 | {
35 | #if !defined(MIPP_AVX) || (defined(MIPP_AVX) && MIPP_INSTR_VERSION >= 2)
36 | #if defined(MIPP_64BIT)
37 | 	SECTION("datatype = int64_t") { test_reg_lshiftr<int64_t>(); }
38 | #endif
39 | 	SECTION("datatype = int32_t") { test_reg_lshiftr<int32_t>(); }
40 | #if defined(MIPP_BW)
41 | #if !defined(MIPP_AVX)
42 | 	SECTION("datatype = int16_t") { test_reg_lshiftr<int16_t>(); }
43 | #if !defined(MIPP_AVX512)
44 | 	SECTION("datatype = int8_t") { test_reg_lshiftr<int8_t>(); }
45 | #endif
46 | #endif
47 | #endif
48 | #endif
49 | }
50 | #endif
51 | 
52 | template <typename T>
53 | void test_Reg_lshiftr()
54 | {
55 | 	T inputs1[mipp::N<T>()];
56 | 	T inputs2[mipp::N<T>()];
57 | 	std::iota(inputs1, inputs1 + mipp::N<T>(), (T)0);
58 | 	std::iota(inputs2, inputs2 + mipp::N<T>(), (T)0);
59 | 
60 | 	std::mt19937 g;
61 | 	std::shuffle(inputs1, inputs1 + mipp::N<T>(), g);
62 | 	std::shuffle(inputs2, inputs2 + mipp::N<T>(), g);
63 | 
64 | 	mipp::Reg<T> r1 = inputs1;
65 | 	mipp::Reg<T> r2 = inputs2;
66 | 	mipp::Reg<T> r3 = r1 << r2;
67 | 
68 | 	for (auto i = 0; i < mipp::N<T>(); i++)
69 | 	{
70 | 		T res = inputs1[i] << inputs2[i];
71 | 		REQUIRE(r3[i] == res);
72 | 	}
73 | }
74 | 
75 | #if !defined(MIPP_SVE_LS)
76 | TEST_CASE("Binary left shift (register) - mipp::Reg", "[mipp::lshiftr]")
77 | {
78 | #if !defined(MIPP_AVX) || (defined(MIPP_AVX) && MIPP_INSTR_VERSION >= 2)
79 | #if defined(MIPP_64BIT)
80 | 	SECTION("datatype = int64_t") { test_Reg_lshiftr<int64_t>(); }
81 | #endif
82 | 	SECTION("datatype = int32_t") { test_Reg_lshiftr<int32_t>(); }
83 | #if defined(MIPP_BW)
84 | #if !defined(MIPP_AVX)
85 | 	SECTION("datatype = int16_t") { test_Reg_lshiftr<int16_t>(); }
86 | #if !defined(MIPP_AVX512)
87 | 	SECTION("datatype = int8_t") { test_Reg_lshiftr<int8_t >(); }
88 | #endif
89 | #endif
90 | #endif
91 | #endif
92 | }
93 | #endif


--------------------------------------------------------------------------------
/tests/src/bitwise_operations/notb.cpp:
--------------------------------------------------------------------------------
  1 | #include <exception>
  2 | #include <algorithm>
  3 | #include <numeric>
  4 | #include <random>
  5 | #include <cmath>
  6 | #include <mipp.h>
  7 | #include <catch.hpp>
  8 | 
  9 | template <typename T>
 10 | void test_reg_notb()
 11 | {
 12 | 	T inputs1[mipp::N<T>()];
 13 | 	std::iota(inputs1, inputs1 + mipp::N<T>(), (T)0);
 14 | 
 15 | 	std::mt19937 g;
 16 | 	std::shuffle(inputs1, inputs1 + mipp::N<T>(), g);
 17 | 
 18 | 	mipp::reg r1 = mipp::load<T>(inputs1);
 19 | 	mipp::reg r2 = mipp::notb<T>(r1);
 20 | 
 21 | 	for (auto i = 0; i < mipp::N<T>(); i++)
 22 | 	{
 23 | 		T res = ~inputs1[i];
 24 | 		REQUIRE(mipp::get<T>(r2, i) == res);
 25 | 	}
 26 | }
 27 | 
 28 | #if !defined(MIPP_NO) && !defined(MIPP_SVE_LS)
 29 | TEST_CASE("Binary not - mipp::reg", "[mipp::notb]")
 30 | {
 31 | #if defined(MIPP_64BIT)
 32 | 	SECTION("datatype = int64_t") { test_reg_notb<int64_t>(); }
 33 | #endif
 34 | 	SECTION("datatype = int32_t") { test_reg_notb<int32_t>(); }
 35 | #if defined(MIPP_BW)
 36 | 	SECTION("datatype = int16_t") { test_reg_notb<int16_t>(); }
 37 | 	SECTION("datatype = int8_t") { test_reg_notb<int8_t>(); }
 38 | #endif
 39 | }
 40 | #endif
 41 | 
 42 | template <typename T>
 43 | void test_Reg_notb()
 44 | {
 45 | 	T inputs1[mipp::N<T>()];
 46 | 	std::iota(inputs1, inputs1 + mipp::N<T>(), (T)0);
 47 | 
 48 | 	std::mt19937 g;
 49 | 	std::shuffle(inputs1, inputs1 + mipp::N<T>(), g);
 50 | 
 51 | 	mipp::Reg<T> r1 = inputs1;
 52 | 	mipp::Reg<T> r2 = ~r1;
 53 | 
 54 | 	for (auto i = 0; i < mipp::N<T>(); i++)
 55 | 	{
 56 | 		T res = ~inputs1[i];
 57 | 		REQUIRE(r2[i] == res);
 58 | 	}
 59 | }
 60 | 
 61 | #if !defined(MIPP_SVE_LS)
 62 | TEST_CASE("Binary not - mipp::Reg", "[mipp::notb]")
 63 | {
 64 | #if defined(MIPP_64BIT)
 65 | 	SECTION("datatype = int64_t") { test_Reg_notb<int64_t>(); }
 66 | #endif
 67 | 	SECTION("datatype = int32_t") { test_Reg_notb<int32_t>(); }
 68 | #if defined(MIPP_BW)
 69 | 	SECTION("datatype = int16_t") { test_Reg_notb<int16_t>(); }
 70 | 	SECTION("datatype = int8_t") { test_Reg_notb<int8_t>(); }
 71 | #endif
 72 | }
 73 | #endif
 74 | 
 75 | template <typename T>
 76 | void test_msk_notb()
 77 | {
 78 | 	constexpr int N = mipp::N<T>();
 79 | 	bool inputs1[N];
 80 | 	std::mt19937 g;
 81 | 	std::uniform_int_distribution<uint16_t> dis(0, 1);
 82 | 
 83 | 	for (auto t = 0; t < 100; t++)
 84 | 	{
 85 | 		for (auto i = 0; i < N; i++)
 86 | 			inputs1[i] = dis(g) ? true : false;
 87 | 
 88 | 		std::shuffle(inputs1, inputs1 + mipp::N<T>(), g);
 89 | 
 90 | 		mipp::msk m1 = mipp::set<N>(inputs1);
 91 | 		mipp::msk m2 = mipp::notb<N>(m1);
 92 | 
 93 | 		mipp::reg r = mipp::toreg<N>(m2);
 94 | 
 95 | 		for (auto i = 0; i < N; i++)
 96 | 		{
 97 | 			bool res = !inputs1[i];
 98 | 
 99 | 			if (res)
100 | 				REQUIRE(mipp::get<T>(r, i) != (T)0);
101 | 			else
102 | 				REQUIRE(mipp::get<T>(r, i) == (T)res);
103 | 		}
104 | 	}
105 | }
106 | 
107 | #ifndef MIPP_NO
108 | TEST_CASE("Binary not - mipp::msk", "[mipp::notb]")
109 | {
110 | #if defined(MIPP_64BIT)
111 | 	SECTION("datatype = int64_t") { test_msk_notb<int64_t>(); }
112 | #endif
113 | 	SECTION("datatype = int32_t") { test_msk_notb<int32_t>(); }
114 | #if defined(MIPP_BW)
115 | 	SECTION("datatype = int16_t") { test_msk_notb<int16_t>(); }
116 | 	SECTION("datatype = int8_t") { test_msk_notb<int8_t>(); }
117 | #endif
118 | }
119 | #endif
120 | 
121 | template <typename T>
122 | void test_Msk_notb()
123 | {
124 | 	constexpr int N = mipp::N<T>();
125 | 	bool inputs1[N];
126 | 	std::mt19937 g;
127 | 	std::uniform_int_distribution<uint16_t> dis(0, 1);
128 | 
129 | 	for (auto t = 0; t < 100; t++)
130 | 	{
131 | 		for (auto i = 0; i < N; i++)
132 | 			inputs1[i] = dis(g) ? true : false;
133 | 
134 | 		std::shuffle(inputs1, inputs1 + mipp::N<T>(), g);
135 | 
136 | 		mipp::Msk<N> m1 = inputs1;
137 | 		mipp::Msk<N> m2 = ~m1;
138 | 
139 | 		for (auto i = 0; i < N; i++)
140 | 		{
141 | 			bool res = !inputs1[i];
142 | 			REQUIRE(m2[i] == res);
143 | 		}
144 | 	}
145 | }
146 | 
147 | TEST_CASE("Binary not - mipp::Msk", "[mipp::notb]")
148 | {
149 | #if defined(MIPP_64BIT)
150 | 	SECTION("datatype = int64_t") { test_Msk_notb<int64_t>(); }
151 | #endif
152 | 	SECTION("datatype = int32_t") { test_Msk_notb<int32_t>(); }
153 | #if defined(MIPP_BW)
154 | 	SECTION("datatype = int16_t") { test_Msk_notb<int16_t>(); }
155 | 	SECTION("datatype = int8_t") { test_Msk_notb<int8_t>(); }
156 | #endif
157 | }


--------------------------------------------------------------------------------
/tests/src/bitwise_operations/orb.cpp:
--------------------------------------------------------------------------------
  1 | #include <exception>
  2 | #include <algorithm>
  3 | #include <numeric>
  4 | #include <random>
  5 | #include <cmath>
  6 | #include <mipp.h>
  7 | #include <catch.hpp>
  8 | 
  9 | template <typename T>
 10 | void test_reg_orb()
 11 | {
 12 | 	T inputs1[mipp::N<T>()], inputs2[mipp::N<T>()];
 13 | 	std::iota(inputs1, inputs1 + mipp::N<T>(), (T)0);
 14 | 	std::iota(inputs2, inputs2 + mipp::N<T>(), (T)0);
 15 | 
 16 | 	std::mt19937 g;
 17 | 	std::shuffle(inputs1, inputs1 + mipp::N<T>(), g);
 18 | 	std::shuffle(inputs2, inputs2 + mipp::N<T>(), g);
 19 | 
 20 | 	mipp::reg r1 = mipp::load<T>(inputs1);
 21 | 	mipp::reg r2 = mipp::load<T>(inputs2);
 22 | 	mipp::reg r3 = mipp::orb<T>(r1, r2);
 23 | 
 24 | 	for (auto i = 0; i < mipp::N<T>(); i++)
 25 | 	{
 26 | 		T res = inputs1[i] | inputs2[i];
 27 | 		REQUIRE(mipp::get<T>(r3, i) == res);
 28 | 	}
 29 | }
 30 | 
 31 | #if !defined(MIPP_NO) && !defined(MIPP_SVE_LS)
 32 | TEST_CASE("Binary or - mipp::reg", "[mipp::orb]")
 33 | {
 34 | #if defined(MIPP_64BIT)
 35 | 	SECTION("datatype = int64_t") { test_reg_orb<int64_t>(); }
 36 | #endif
 37 | 	SECTION("datatype = int32_t") { test_reg_orb<int32_t>(); }
 38 | #if defined(MIPP_BW)
 39 | 	SECTION("datatype = int16_t") { test_reg_orb<int16_t>(); }
 40 | 	SECTION("datatype = int8_t") { test_reg_orb<int8_t>(); }
 41 | #endif
 42 | }
 43 | #endif
 44 | 
 45 | template <typename T>
 46 | void test_Reg_orb()
 47 | {
 48 | 	T inputs1[mipp::N<T>()], inputs2[mipp::N<T>()];
 49 | 	std::iota(inputs1, inputs1 + mipp::N<T>(), (T)0);
 50 | 	std::iota(inputs2, inputs2 + mipp::N<T>(), (T)0);
 51 | 
 52 | 	std::mt19937 g;
 53 | 	std::shuffle(inputs1, inputs1 + mipp::N<T>(), g);
 54 | 	std::shuffle(inputs2, inputs2 + mipp::N<T>(), g);
 55 | 
 56 | 	mipp::Reg<T> r1 = inputs1;
 57 | 	mipp::Reg<T> r2 = inputs2;
 58 | 	mipp::Reg<T> r3 = r1 | r2;
 59 | 
 60 | 	for (auto i = 0; i < mipp::N<T>(); i++)
 61 | 	{
 62 | 		T res = inputs1[i] | inputs2[i];
 63 | 		REQUIRE(r3[i] == res);
 64 | 	}
 65 | }
 66 | 
 67 | #if !defined(MIPP_SVE_LS)
 68 | TEST_CASE("Binary or - mipp::Reg", "[mipp::orb]")
 69 | {
 70 | #if defined(MIPP_64BIT)
 71 | 	SECTION("datatype = int64_t") { test_Reg_orb<int64_t>(); }
 72 | #endif
 73 | 	SECTION("datatype = int32_t") { test_Reg_orb<int32_t>(); }
 74 | #if defined(MIPP_BW)
 75 | 	SECTION("datatype = int16_t") { test_Reg_orb<int16_t>(); }
 76 | 	SECTION("datatype = int8_t") { test_Reg_orb<int8_t>(); }
 77 | #endif
 78 | }
 79 | #endif
 80 | 
 81 | template <typename T>
 82 | void test_msk_orb()
 83 | {
 84 | 	constexpr int N = mipp::N<T>();
 85 | 	bool inputs1[N], inputs2[N];
 86 | 	std::mt19937 g;
 87 | 	std::uniform_int_distribution<uint16_t> dis(0, 1);
 88 | 
 89 | 	for (auto t = 0; t < 100; t++)
 90 | 	{
 91 | 		for (auto i = 0; i < N; i++)
 92 | 		{
 93 | 			inputs1[i] = dis(g) ? true : false;
 94 | 			inputs2[i] = dis(g) ? true : false;
 95 | 		}
 96 | 
 97 | 		std::shuffle(inputs1, inputs1 + mipp::N<T>(), g);
 98 | 		std::shuffle(inputs2, inputs2 + mipp::N<T>(), g);
 99 | 
100 | 		mipp::msk m1 = mipp::set<N>(inputs1);
101 | 		mipp::msk m2 = mipp::set<N>(inputs2);
102 | 		mipp::msk m3 = mipp::orb<N>(m1, m2);
103 | 
104 | 		mipp::reg r = mipp::toreg<N>(m3);
105 | 
106 | 		for (auto i = 0; i < N; i++)
107 | 		{
108 | 			bool res = inputs1[i] | inputs2[i];
109 | 
110 | 			if (res)
111 | 				REQUIRE(mipp::get<T>(r, i) != (T)0);
112 | 			else
113 | 				REQUIRE(mipp::get<T>(r, i) == (T)res);
114 | 		}
115 | 	}
116 | }
117 | 
118 | #if !defined(MIPP_NO) && !defined(MIPP_SVE_LS)
119 | TEST_CASE("Binary or - mipp::msk", "[mipp::orb]")
120 | {
121 | #if defined(MIPP_64BIT)
122 | 	SECTION("datatype = int64_t") { test_msk_orb<int64_t>(); }
123 | #endif
124 | 	SECTION("datatype = int32_t") { test_msk_orb<int32_t>(); }
125 | #if defined(MIPP_BW)
126 | 	SECTION("datatype = int16_t") { test_msk_orb<int16_t>(); }
127 | 	SECTION("datatype = int8_t") { test_msk_orb<int8_t>(); }
128 | #endif
129 | }
130 | #endif
131 | 
132 | template <typename T>
133 | void test_Msk_orb()
134 | {
135 | 	constexpr int N = mipp::N<T>();
136 | 	bool inputs1[N], inputs2[N];
137 | 	std::mt19937 g;
138 | 	std::uniform_int_distribution<uint16_t> dis(0, 1);
139 | 
140 | 	for (auto t = 0; t < 100; t++)
141 | 	{
142 | 		for (auto i = 0; i < N; i++)
143 | 		{
144 | 			inputs1[i] = dis(g) ? true : false;
145 | 			inputs2[i] = dis(g) ? true : false;
146 | 		}
147 | 
148 | 		std::shuffle(inputs1, inputs1 + mipp::N<T>(), g);
149 | 		std::shuffle(inputs2, inputs2 + mipp::N<T>(), g);
150 | 
151 | 		mipp::Msk<N> m1 = inputs1;
152 | 		mipp::Msk<N> m2 = inputs2;
153 | 		mipp::Msk<N> m3 = m1 | m2;
154 | 
155 | 		for (auto i = 0; i < N; i++)
156 | 		{
157 | 			bool res = inputs1[i] | inputs2[i];
158 | 			REQUIRE(m3[i] == res);
159 | 		}
160 | 	}
161 | }
162 | 
163 | #if !defined(MIPP_SVE_LS)
164 | TEST_CASE("Binary or - mipp::Msk", "[mipp::orb]")
165 | {
166 | #if defined(MIPP_64BIT)
167 | 	SECTION("datatype = int64_t") { test_Msk_orb<int64_t>(); }
168 | #endif
169 | 	SECTION("datatype = int32_t") { test_Msk_orb<int32_t>(); }
170 | #if defined(MIPP_BW)
171 | 	SECTION("datatype = int16_t") { test_Msk_orb<int16_t>(); }
172 | 	SECTION("datatype = int8_t") { test_Msk_orb<int8_t>(); }
173 | #endif
174 | }
175 | #endif
176 | 


--------------------------------------------------------------------------------
/tests/src/bitwise_operations/rshiftr.cpp:
--------------------------------------------------------------------------------
 1 | #include <exception>
 2 | #include <algorithm>
 3 | #include <numeric>
 4 | #include <random>
 5 | #include <cmath>
 6 | #include <mipp.h>
 7 | #include <catch.hpp>
 8 | 
 9 | template <typename T>
10 | void test_reg_rshiftr()
11 | {
12 | 	T inputs1[mipp::N<T>()];
13 | 	T inputs2[mipp::N<T>()];
14 | 	std::iota(inputs1, inputs1 + mipp::N<T>(), (T)0);
15 | 	std::iota(inputs2, inputs2 + mipp::N<T>(), (T)0);
16 | 
17 | 	std::mt19937 g;
18 | 	std::shuffle(inputs1, inputs1 + mipp::N<T>(), g);
19 | 	std::shuffle(inputs2, inputs2 + mipp::N<T>(), g);
20 | 
21 | 	mipp::reg r1 = mipp::load<T>(inputs1);
22 | 	mipp::reg r2 = mipp::load<T>(inputs2);
23 | 	mipp::reg r3 = mipp::rshiftr<T>(r1, r2);
24 | 
25 | 	for (auto i = 0; i < mipp::N<T>(); i++)
26 | 	{
27 | 		T res = inputs1[i] >> inputs2[i];
28 | 		REQUIRE(mipp::get<T>(r3, i) == res);
29 | 	}
30 | }
31 | 
32 | #if !defined(MIPP_NO) && !defined(MIPP_SVE_LS)
33 | TEST_CASE("Binary right shift (register) - mipp::reg", "[mipp::rshiftr]")
34 | {
35 | #if !defined(MIPP_AVX) || (defined(MIPP_AVX) && MIPP_INSTR_VERSION >= 2)
36 | #if defined(MIPP_64BIT)
37 | 	SECTION("datatype = int64_t") { test_reg_rshiftr<int64_t>(); }
38 | #endif
39 | 	SECTION("datatype = int32_t") { test_reg_rshiftr<int32_t>(); }
40 | #if defined(MIPP_BW)
41 | #if !defined(MIPP_AVX)
42 | 	SECTION("datatype = int16_t") { test_reg_rshiftr<int16_t>(); }
43 | #if !defined(MIPP_AVX512)
44 | 	SECTION("datatype = int8_t") { test_reg_rshiftr<int8_t>(); }
45 | #endif
46 | #endif
47 | #endif
48 | #endif
49 | }
50 | #endif
51 | 
52 | template <typename T>
53 | void test_Reg_rshiftr()
54 | {
55 | 	T inputs1[mipp::N<T>()];
56 | 	T inputs2[mipp::N<T>()];
57 | 	std::iota(inputs1, inputs1 + mipp::N<T>(), (T)0);
58 | 	std::iota(inputs2, inputs2 + mipp::N<T>(), (T)0);
59 | 
60 | 	std::mt19937 g;
61 | 	std::shuffle(inputs1, inputs1 + mipp::N<T>(), g);
62 | 	std::shuffle(inputs2, inputs2 + mipp::N<T>(), g);
63 | 
64 | 	mipp::Reg<T> r1 = inputs1;
65 | 	mipp::Reg<T> r2 = inputs2;
66 | 	mipp::Reg<T> r3 = r1 >> r2;
67 | 
68 | 	for (auto i = 0; i < mipp::N<T>(); i++)
69 | 	{
70 | 		T res = inputs1[i] >> inputs2[i];
71 | 		REQUIRE(r3[i] == res);
72 | 	}
73 | }
74 | 
75 | #if !defined(MIPP_SVE_LS)
76 | TEST_CASE("Binary right shift (register) - mipp::Reg", "[mipp::rshiftr]")
77 | {
78 | #if !defined(MIPP_AVX) || (defined(MIPP_AVX) && MIPP_INSTR_VERSION >= 2)
79 | #if defined(MIPP_64BIT)
80 | 	SECTION("datatype = int64_t") { test_Reg_rshiftr<int64_t>(); }
81 | #endif
82 | 	SECTION("datatype = int32_t") { test_Reg_rshiftr<int32_t>(); }
83 | #if defined(MIPP_BW)
84 | #if !defined(MIPP_AVX)
85 | 	SECTION("datatype = int16_t") { test_Reg_rshiftr<int16_t>(); }
86 | #if !defined(MIPP_AVX512)
87 | 	SECTION("datatype = int8_t") { test_Reg_rshiftr<int8_t >(); }
88 | #endif
89 | #endif
90 | #endif
91 | #endif
92 | }
93 | #endif
94 | 


--------------------------------------------------------------------------------
/tests/src/bitwise_operations/xorb.cpp:
--------------------------------------------------------------------------------
  1 | #include <exception>
  2 | #include <algorithm>
  3 | #include <numeric>
  4 | #include <random>
  5 | #include <cmath>
  6 | #include <mipp.h>
  7 | #include <catch.hpp>
  8 | 
  9 | template <typename T>
 10 | void test_reg_xorb()
 11 | {
 12 | 
 13 | 	T inputs1[mipp::N<T>()], inputs2[mipp::N<T>()];
 14 | 	std::iota(inputs1, inputs1 + mipp::N<T>(), (T)0);
 15 | 	std::iota(inputs2, inputs2 + mipp::N<T>(), (T)0);
 16 | 
 17 | 	std::mt19937 g;
 18 | 	std::shuffle(inputs1, inputs1 + mipp::N<T>(), g);
 19 | 	std::shuffle(inputs2, inputs2 + mipp::N<T>(), g);
 20 | 
 21 | 	mipp::reg r1 = mipp::load<T>(inputs1);
 22 | 	mipp::reg r2 = mipp::load<T>(inputs2);
 23 | 	mipp::reg r3 = mipp::xorb<T>(r1, r2);
 24 | 
 25 | 	for (auto i = 0; i < mipp::N<T>(); i++)
 26 | 	{
 27 | 		T res = inputs1[i] ^ inputs2[i];
 28 | 		REQUIRE(mipp::get<T>(r3, i) == res);
 29 | 	}
 30 | }
 31 | 
 32 | #if !defined(MIPP_NO) && !defined(MIPP_SVE_LS)
 33 | TEST_CASE("Binary xor - mipp::reg", "[mipp::xorb]")
 34 | {
 35 | #if defined(MIPP_64BIT)
 36 | 	SECTION("datatype = int64_t") { test_reg_xorb<int64_t>(); }
 37 | #endif
 38 | 	SECTION("datatype = int32_t") { test_reg_xorb<int32_t>(); }
 39 | #if defined(MIPP_BW)
 40 | 	SECTION("datatype = int16_t") { test_reg_xorb<int16_t>(); }
 41 | 	SECTION("datatype = int8_t") { test_reg_xorb<int8_t>(); }
 42 | #endif
 43 | }
 44 | #endif
 45 | 
 46 | template <typename T>
 47 | void test_Reg_xorb()
 48 | {
 49 | 	T inputs1[mipp::N<T>()], inputs2[mipp::N<T>()];
 50 | 	std::iota(inputs1, inputs1 + mipp::N<T>(), (T)0);
 51 | 	std::iota(inputs2, inputs2 + mipp::N<T>(), (T)0);
 52 | 
 53 | 	std::mt19937 g;
 54 | 	std::shuffle(inputs1, inputs1 + mipp::N<T>(), g);
 55 | 	std::shuffle(inputs2, inputs2 + mipp::N<T>(), g);
 56 | 
 57 | 	mipp::Reg<T> r1 = inputs1;
 58 | 	mipp::Reg<T> r2 = inputs2;
 59 | 	mipp::Reg<T> r3 = r1 ^ r2;
 60 | 
 61 | 	for (auto i = 0; i < mipp::N<T>(); i++)
 62 | 	{
 63 | 		T res = inputs1[i] ^ inputs2[i];
 64 | 		REQUIRE(r3[i] == res);
 65 | 	}
 66 | }
 67 | 
 68 | #if !defined(MIPP_SVE_LS)
 69 | TEST_CASE("Binary xor - mipp::Reg", "[mipp::xorb]")
 70 | {
 71 | #if defined(MIPP_64BIT)
 72 | 	SECTION("datatype = int64_t") { test_Reg_xorb<int64_t>(); }
 73 | #endif
 74 | 	SECTION("datatype = int32_t") { test_Reg_xorb<int32_t>(); }
 75 | #if defined(MIPP_BW)
 76 | 	SECTION("datatype = int16_t") { test_Reg_xorb<int16_t>(); }
 77 | 	SECTION("datatype = int8_t") { test_Reg_xorb<int8_t>(); }
 78 | #endif
 79 | }
 80 | #endif
 81 | 
 82 | template <typename T>
 83 | void test_msk_xorb()
 84 | {
 85 | 	constexpr int N = mipp::N<T>();
 86 | 	bool inputs1[N], inputs2[N];
 87 | 	std::mt19937 g;
 88 | 	std::uniform_int_distribution<uint16_t> dis(0, 1);
 89 | 
 90 | 	for (auto t = 0; t < 100; t++)
 91 | 	{
 92 | 		for (auto i = 0; i < N; i++)
 93 | 		{
 94 | 			inputs1[i] = dis(g) ? true : false;
 95 | 			inputs2[i] = dis(g) ? true : false;
 96 | 		}
 97 | 
 98 | 		std::shuffle(inputs1, inputs1 + mipp::N<T>(), g);
 99 | 		std::shuffle(inputs2, inputs2 + mipp::N<T>(), g);
100 | 
101 | 		mipp::msk m1 = mipp::set<N>(inputs1);
102 | 		mipp::msk m2 = mipp::set<N>(inputs2);
103 | 		mipp::msk m3 = mipp::xorb<N>(m1, m2);
104 | 
105 | 		mipp::reg r = mipp::toreg<N>(m3);
106 | 
107 | 		for (auto i = 0; i < N; i++)
108 | 		{
109 | 			bool res = inputs1[i] ^ inputs2[i];
110 | 
111 | 			if (res)
112 | 				REQUIRE(mipp::get<T>(r, i) != (T)0);
113 | 			else
114 | 				REQUIRE(mipp::get<T>(r, i) == (T)res);
115 | 		}
116 | 	}
117 | }
118 | 
119 | #if !defined(MIPP_NO) && !defined(MIPP_SVE_LS)
120 | TEST_CASE("Binary xor - mipp::msk", "[mipp::xorb]")
121 | {
122 | #if defined(MIPP_64BIT)
123 | 	SECTION("datatype = int64_t") { test_msk_xorb<int64_t>(); }
124 | #endif
125 | 	SECTION("datatype = int32_t") { test_msk_xorb<int32_t>(); }
126 | #if defined(MIPP_BW)
127 | 	SECTION("datatype = int16_t") { test_msk_xorb<int16_t>(); }
128 | 	SECTION("datatype = int8_t") { test_msk_xorb<int8_t>(); }
129 | #endif
130 | }
131 | #endif
132 | 
133 | template <typename T>
134 | void test_Msk_xorb()
135 | {
136 | 	constexpr int N = mipp::N<T>();
137 | 	bool inputs1[N], inputs2[N];
138 | 	std::mt19937 g;
139 | 	std::uniform_int_distribution<uint16_t> dis(0, 1);
140 | 
141 | 	for (auto t = 0; t < 100; t++)
142 | 	{
143 | 		for (auto i = 0; i < N; i++)
144 | 		{
145 | 			inputs1[i] = dis(g) ? true : false;
146 | 			inputs2[i] = dis(g) ? true : false;
147 | 		}
148 | 
149 | 		std::shuffle(inputs1, inputs1 + mipp::N<T>(), g);
150 | 		std::shuffle(inputs2, inputs2 + mipp::N<T>(), g);
151 | 
152 | 		mipp::Msk<N> m1 = inputs1;
153 | 		mipp::Msk<N> m2 = inputs2;
154 | 		mipp::Msk<N> m3 = m1 ^ m2;
155 | 
156 | 		for (auto i = 0; i < N; i++)
157 | 		{
158 | 			bool res = inputs1[i] ^ inputs2[i];
159 | 			REQUIRE(m3[i] == res);
160 | 		}
161 | 	}
162 | }
163 | 
164 | #if !defined(MIPP_SVE_LS)
165 | TEST_CASE("Binary xor - mipp::Msk", "[mipp::xorb]")
166 | {
167 | #if defined(MIPP_64BIT)
168 | 	SECTION("datatype = int64_t") { test_Msk_xorb<int64_t>(); }
169 | #endif
170 | 	SECTION("datatype = int32_t") { test_Msk_xorb<int32_t>(); }
171 | #if defined(MIPP_BW)
172 | 	SECTION("datatype = int16_t") { test_Msk_xorb<int16_t>(); }
173 | 	SECTION("datatype = int8_t") { test_Msk_xorb<int8_t>(); }
174 | #endif
175 | }
176 | #endif
177 | 
178 | 


--------------------------------------------------------------------------------
/tests/src/logical_comparisons/cmpeq.cpp:
--------------------------------------------------------------------------------
  1 | #include <exception>
  2 | #include <algorithm>
  3 | #include <numeric>
  4 | #include <random>
  5 | #include <cmath>
  6 | #include <mipp.h>
  7 | #include <catch.hpp>
  8 | 
  9 | template <typename T>
 10 | void test_reg_cmpeq()
 11 | {
 12 | 	T inputs1[mipp::N<T>()], inputs2[mipp::N<T>()];
 13 | 	std::iota(inputs1, inputs1 + mipp::N<T>(), (T)1);
 14 | 	std::iota(inputs2, inputs2 + mipp::N<T>(), (T)1);
 15 | 
 16 | 	std::mt19937 g;
 17 | 	for (auto t = 0; t < 100; t++)
 18 | 	{
 19 | 		std::shuffle(inputs1, inputs1 + mipp::N<T>(), g);
 20 | 		std::shuffle(inputs2, inputs2 + mipp::N<T>(), g);
 21 | 
 22 | 		mipp::reg r1 = mipp::load<T>(inputs1);
 23 | 		mipp::reg r2 = mipp::load<T>(inputs2);
 24 | 		mipp::msk m  = mipp::cmpeq<T>(r1, r2);
 25 | 
 26 | 		mipp::reg r3 = mipp::toreg<mipp::N<T>()>(m);
 27 | 
 28 | 		for (auto i = 0; i < mipp::N<T>(); i++)
 29 | 		{
 30 | 			if (inputs1[i] == inputs2[i])
 31 | 				REQUIRE(mipp::get<T>(r3, i) != (T)0);
 32 | 			else
 33 | 				REQUIRE(mipp::get<T>(r3, i) == (T)0);
 34 | 		}
 35 | 	}
 36 | }
 37 | 
 38 | #ifndef MIPP_NO
 39 | TEST_CASE("Compare equal - mipp::reg", "[mipp::cmpeq]")
 40 | {
 41 | #if defined(MIPP_64BIT)
 42 | 	SECTION("datatype = double") { test_reg_cmpeq<double>(); }
 43 | #endif
 44 | 	SECTION("datatype = float") { test_reg_cmpeq<float>(); }
 45 | 
 46 | #if !defined(MIPP_AVX) || defined(MIPP_AVX2) || (defined(MIPP_AVX) && !defined(__INTEL_COMPILER) && !defined(__ICL) && !defined(__ICC))
 47 | #if defined(MIPP_64BIT)
 48 | #if !defined(MIPP_SSE) || (defined(MIPP_SSE) && MIPP_INSTR_VERSION >= 41)
 49 | 	SECTION("datatype = int64_t") { test_reg_cmpeq<int64_t>(); }
 50 | #endif
 51 | #endif
 52 | 	SECTION("datatype = int32_t") { test_reg_cmpeq<int32_t>(); }
 53 | #endif
 54 | #if defined(MIPP_BW)
 55 | 	SECTION("datatype = int16_t") { test_reg_cmpeq<int16_t>(); }
 56 | 	SECTION("datatype = int8_t") { test_reg_cmpeq<int8_t>(); }
 57 | #endif
 58 | }
 59 | #endif
 60 | 
 61 | template <typename T>
 62 | void test_Reg_cmpeq()
 63 | {
 64 | 	T inputs1[mipp::N<T>()], inputs2[mipp::N<T>()];
 65 | 	std::iota(inputs1, inputs1 + mipp::N<T>(), (T)1);
 66 | 	std::iota(inputs2, inputs2 + mipp::N<T>(), (T)1);
 67 | 
 68 | 	std::mt19937 g;
 69 | 	for (auto t = 0; t < 100; t++)
 70 | 	{
 71 | 		std::shuffle(inputs1, inputs1 + mipp::N<T>(), g);
 72 | 		std::shuffle(inputs2, inputs2 + mipp::N<T>(), g);
 73 | 
 74 | 		mipp::Reg<T> r1 = inputs1;
 75 | 		mipp::Reg<T> r2 = inputs2;
 76 | 		mipp::Msk<mipp::N<T>()> m = r1 == r2;
 77 | 
 78 | 		for (auto i = 0; i < mipp::N<T>(); i++)
 79 | 			REQUIRE(m[i] == (inputs1[i] == inputs2[i]));
 80 | 	}
 81 | }
 82 | 
 83 | TEST_CASE("Compare equal - mipp::Reg", "[mipp::cmpeq]")
 84 | {
 85 | #if defined(MIPP_64BIT)
 86 | 	SECTION("datatype = double") { test_Reg_cmpeq<double>(); }
 87 | #endif
 88 | 	SECTION("datatype = float") { test_Reg_cmpeq<float>(); }
 89 | 
 90 | #if !defined(MIPP_AVX) || defined(MIPP_AVX2) || (defined(MIPP_AVX) && !defined(__INTEL_COMPILER) && !defined(__ICL) && !defined(__ICC))
 91 | #if defined(MIPP_64BIT)
 92 | #if !defined(MIPP_SSE) || (defined(MIPP_SSE) && MIPP_INSTR_VERSION >= 41)
 93 | 	SECTION("datatype = int64_t") { test_Reg_cmpeq<int64_t>(); }
 94 | #endif
 95 | #endif
 96 | 	SECTION("datatype = int32_t") { test_Reg_cmpeq<int32_t>(); }
 97 | #endif
 98 | #if defined(MIPP_BW)
 99 | 	SECTION("datatype = int16_t") { test_Reg_cmpeq<int16_t>(); }
100 | 	SECTION("datatype = int8_t") { test_Reg_cmpeq<int8_t>(); }
101 | #endif
102 | }
103 | 


--------------------------------------------------------------------------------
/tests/src/logical_comparisons/cmpge.cpp:
--------------------------------------------------------------------------------
  1 | #include <exception>
  2 | #include <algorithm>
  3 | #include <numeric>
  4 | #include <random>
  5 | #include <cmath>
  6 | #include <mipp.h>
  7 | #include <catch.hpp>
  8 | 
  9 | template <typename T>
 10 | void test_reg_cmpge()
 11 | {
 12 | 	T inputs1[mipp::N<T>()], inputs2[mipp::N<T>()];
 13 | 	std::iota(inputs1, inputs1 + mipp::N<T>(), (T)1);
 14 | 	std::iota(inputs2, inputs2 + mipp::N<T>(), (T)1);
 15 | 
 16 | 	std::mt19937 g;
 17 | 	for (auto t = 0; t < 100; t++)
 18 | 	{
 19 | 		std::shuffle(inputs1, inputs1 + mipp::N<T>(), g);
 20 | 		std::shuffle(inputs2, inputs2 + mipp::N<T>(), g);
 21 | 
 22 | 		mipp::reg r1 = mipp::load<T>(inputs1);
 23 | 		mipp::reg r2 = mipp::load<T>(inputs2);
 24 | 		mipp::msk m  = mipp::cmpge<T>(r1, r2);
 25 | 
 26 | 		mipp::reg r3 = mipp::toreg<mipp::N<T>()>(m);
 27 | 
 28 | 		for (auto i = 0; i < mipp::N<T>(); i++)
 29 | 		{
 30 | 			if (inputs1[i] >= inputs2[i])
 31 | 				REQUIRE(mipp::get<T>(r3, i) != (T)0);
 32 | 			else
 33 | 				REQUIRE(mipp::get<T>(r3, i) == (T)0);
 34 | 		}
 35 | 	}
 36 | }
 37 | 
 38 | #if !defined(MIPP_NO) && !defined(MIPP_SVE_LS)
 39 | TEST_CASE("Compare greater or equal - mipp::reg", "[mipp::cmpge]")
 40 | {
 41 | #if defined(MIPP_64BIT)
 42 | 	SECTION("datatype = double") { test_reg_cmpge<double>(); }
 43 | #endif
 44 | 	SECTION("datatype = float") { test_reg_cmpge<float>(); }
 45 | 
 46 | #if !defined(MIPP_AVX) || (defined(MIPP_AVX) && MIPP_INSTR_VERSION >= 2)
 47 | #if !defined(MIPP_SSE)
 48 | #if defined(MIPP_64BIT)
 49 | 	SECTION("datatype = int64_t") { test_reg_cmpge<int64_t>(); }
 50 | 	SECTION("datatype = uint64_t") { test_reg_cmpge<uint64_t>(); }
 51 | #endif
 52 | #endif
 53 | 	SECTION("datatype = int32_t") { test_reg_cmpge<int32_t>(); }
 54 | 	SECTION("datatype = uint32_t") { test_reg_cmpge<uint32_t>(); }
 55 | #endif
 56 | #if defined(MIPP_BW)
 57 | 	SECTION("datatype = int16_t") { test_reg_cmpge<int16_t>(); }
 58 | 	SECTION("datatype = uint16_t") { test_reg_cmpge<uint16_t>(); }
 59 | 	SECTION("datatype = int8_t") { test_reg_cmpge<int8_t>(); }
 60 | 	SECTION("datatype = uint8_t") { test_reg_cmpge<uint8_t>(); }
 61 | #endif
 62 | }
 63 | #endif
 64 | 
 65 | template <typename T>
 66 | void test_Reg_cmpge()
 67 | {
 68 | 	T inputs1[mipp::N<T>()], inputs2[mipp::N<T>()];
 69 | 	std::iota(inputs1, inputs1 + mipp::N<T>(), (T)1);
 70 | 	std::iota(inputs2, inputs2 + mipp::N<T>(), (T)1);
 71 | 
 72 | 	std::mt19937 g;
 73 | 	for (auto t = 0; t < 100; t++)
 74 | 	{
 75 | 		std::shuffle(inputs1, inputs1 + mipp::N<T>(), g);
 76 | 		std::shuffle(inputs2, inputs2 + mipp::N<T>(), g);
 77 | 
 78 | 		mipp::Reg<T> r1 = inputs1;
 79 | 		mipp::Reg<T> r2 = inputs2;
 80 | 		mipp::Msk<mipp::N<T>()> m = r1 >= r2;
 81 | 
 82 | 		for (auto i = 0; i < mipp::N<T>(); i++)
 83 | 			REQUIRE(m[i] == (inputs1[i] >= inputs2[i]));
 84 | 	}
 85 | }
 86 | 
 87 | #if !defined(MIPP_SVE_LS)
 88 | TEST_CASE("Compare greater or equal - mipp::Reg", "[mipp::cmpge]")
 89 | {
 90 | #if defined(MIPP_64BIT)
 91 | 	SECTION("datatype = double") { test_Reg_cmpge<double>(); }
 92 | #endif
 93 | 	SECTION("datatype = float") { test_Reg_cmpge<float>(); }
 94 | 
 95 | #if !defined(MIPP_AVX) || (defined(MIPP_AVX) && MIPP_INSTR_VERSION >= 2)
 96 | #if !defined(MIPP_SSE)
 97 | #if defined(MIPP_64BIT)
 98 | 	SECTION("datatype = int64_t") { test_Reg_cmpge<int64_t>(); }
 99 | 	SECTION("datatype = uint64_t") { test_Reg_cmpge<uint64_t>(); }
100 | #endif
101 | #endif
102 | 	SECTION("datatype = int32_t") { test_Reg_cmpge<int32_t>(); }
103 | 	SECTION("datatype = uint32_t") { test_Reg_cmpge<uint32_t>(); }
104 | #endif
105 | #if defined(MIPP_BW)
106 | 	SECTION("datatype = int16_t") { test_Reg_cmpge<int16_t>(); }
107 | 	SECTION("datatype = uint16_t") { test_Reg_cmpge<uint16_t>(); }
108 | 	SECTION("datatype = int8_t") { test_Reg_cmpge<int8_t>(); }
109 | 	SECTION("datatype = uint8_t") { test_Reg_cmpge<uint8_t>(); }
110 | #endif
111 | }
112 | #endif
113 | 


--------------------------------------------------------------------------------
/tests/src/logical_comparisons/cmpgt.cpp:
--------------------------------------------------------------------------------
  1 | #include <exception>
  2 | #include <algorithm>
  3 | #include <numeric>
  4 | #include <random>
  5 | #include <cmath>
  6 | #include <mipp.h>
  7 | #include <catch.hpp>
  8 | 
  9 | template <typename T>
 10 | void test_reg_cmpgt()
 11 | {
 12 | 	T inputs1[mipp::N<T>()], inputs2[mipp::N<T>()];
 13 | 	std::iota(inputs1, inputs1 + mipp::N<T>(), (T)1);
 14 | 	std::iota(inputs2, inputs2 + mipp::N<T>(), (T)1);
 15 | 
 16 | 	std::mt19937 g;
 17 | 	for (auto t = 0; t < 100; t++)
 18 | 	{
 19 | 		std::shuffle(inputs1, inputs1 + mipp::N<T>(), g);
 20 | 		std::shuffle(inputs2, inputs2 + mipp::N<T>(), g);
 21 | 
 22 | 		mipp::reg r1 = mipp::load<T>(inputs1);
 23 | 		mipp::reg r2 = mipp::load<T>(inputs2);
 24 | 		mipp::msk m  = mipp::cmpgt<T>(r1, r2);
 25 | 
 26 | 		mipp::reg r3 = mipp::toreg<mipp::N<T>()>(m);
 27 | 
 28 | 		for (auto i = 0; i < mipp::N<T>(); i++)
 29 | 		{
 30 | 			if (inputs1[i] > inputs2[i])
 31 | 				REQUIRE(mipp::get<T>(r3, i) != (T)0);
 32 | 			else
 33 | 				REQUIRE(mipp::get<T>(r3, i) == (T)0);
 34 | 		}
 35 | 	}
 36 | }
 37 | 
 38 | #if !defined(MIPP_NO) && !defined(MIPP_SVE_LS)
 39 | TEST_CASE("Compare greater than - mipp::reg", "[mipp::cmpgt]")
 40 | {
 41 | #if defined(MIPP_64BIT)
 42 | 	SECTION("datatype = double") { test_reg_cmpgt<double>(); }
 43 | #endif
 44 | 	SECTION("datatype = float") { test_reg_cmpgt<float>(); }
 45 | 
 46 | #if !defined(MIPP_AVX) || (defined(MIPP_AVX) && MIPP_INSTR_VERSION >= 2)
 47 | #if !defined(MIPP_SSE)
 48 | #if defined(MIPP_64BIT)
 49 | 	SECTION("datatype = int64_t") { test_reg_cmpgt<int64_t>(); }
 50 | 	SECTION("datatype = uint64_t") { test_reg_cmpgt<uint64_t>(); }
 51 | #endif
 52 | #endif
 53 | 	SECTION("datatype = int32_t") { test_reg_cmpgt<int32_t>(); }
 54 | 	SECTION("datatype = uint32_t") { test_reg_cmpgt<uint32_t>(); }
 55 | #endif
 56 | #if defined(MIPP_BW)
 57 | 	SECTION("datatype = int16_t") { test_reg_cmpgt<int16_t>(); }
 58 | 	SECTION("datatype = uint16_t") { test_reg_cmpgt<uint16_t>(); }
 59 | 	SECTION("datatype = int8_t") { test_reg_cmpgt<int8_t>(); }
 60 | 	SECTION("datatype = uint8_t") { test_reg_cmpgt<uint8_t>(); }
 61 | #endif
 62 | }
 63 | #endif
 64 | 
 65 | template <typename T>
 66 | void test_Reg_cmpgt()
 67 | {
 68 | 	T inputs1[mipp::N<T>()], inputs2[mipp::N<T>()];
 69 | 	std::iota(inputs1, inputs1 + mipp::N<T>(), (T)1);
 70 | 	std::iota(inputs2, inputs2 + mipp::N<T>(), (T)1);
 71 | 
 72 | 	std::mt19937 g;
 73 | 	for (auto t = 0; t < 100; t++)
 74 | 	{
 75 | 		std::shuffle(inputs1, inputs1 + mipp::N<T>(), g);
 76 | 		std::shuffle(inputs2, inputs2 + mipp::N<T>(), g);
 77 | 
 78 | 		mipp::Reg<T> r1 = inputs1;
 79 | 		mipp::Reg<T> r2 = inputs2;
 80 | 		mipp::Msk<mipp::N<T>()> m = r1 > r2;
 81 | 
 82 | 		for (auto i = 0; i < mipp::N<T>(); i++)
 83 | 			REQUIRE(m[i] == (inputs1[i] > inputs2[i]));
 84 | 	}
 85 | }
 86 | 
 87 | #if !defined(MIPP_SVE_LS)
 88 | TEST_CASE("Compare greater than - mipp::Reg", "[mipp::cmpgt]")
 89 | {
 90 | #if defined(MIPP_64BIT)
 91 | 	SECTION("datatype = double") { test_Reg_cmpgt<double>(); }
 92 | #endif
 93 | 	SECTION("datatype = float") { test_Reg_cmpgt<float>(); }
 94 | 
 95 | #if !defined(MIPP_AVX) || (defined(MIPP_AVX) && MIPP_INSTR_VERSION >= 2)
 96 | #if !defined(MIPP_SSE)
 97 | #if defined(MIPP_64BIT)
 98 | 	SECTION("datatype = int64_t") { test_Reg_cmpgt<int64_t>(); }
 99 | 	SECTION("datatype = uint64_t") { test_Reg_cmpgt<uint64_t>(); }
100 | #endif
101 | #endif
102 | 	SECTION("datatype = int32_t") { test_Reg_cmpgt<int32_t>(); }
103 | 	SECTION("datatype = uint32_t") { test_Reg_cmpgt<uint32_t>(); }
104 | #endif
105 | #if defined(MIPP_BW)
106 | 	SECTION("datatype = int16_t") { test_Reg_cmpgt<int16_t>(); }
107 | 	SECTION("datatype = uint16_t") { test_Reg_cmpgt<uint16_t>(); }
108 | 	SECTION("datatype = int8_t") { test_Reg_cmpgt<int8_t>(); }
109 | 	SECTION("datatype = uint8_t") { test_Reg_cmpgt<uint8_t>(); }
110 | #endif
111 | }
112 | #endif
113 | 


--------------------------------------------------------------------------------
/tests/src/logical_comparisons/cmple.cpp:
--------------------------------------------------------------------------------
  1 | #include <exception>
  2 | #include <algorithm>
  3 | #include <numeric>
  4 | #include <random>
  5 | #include <cmath>
  6 | #include <mipp.h>
  7 | #include <catch.hpp>
  8 | 
  9 | template <typename T>
 10 | void test_reg_cmple()
 11 | {
 12 | 	T inputs1[mipp::N<T>()], inputs2[mipp::N<T>()];
 13 | 	std::iota(inputs1, inputs1 + mipp::N<T>(), (T)1);
 14 | 	std::iota(inputs2, inputs2 + mipp::N<T>(), (T)1);
 15 | 
 16 | 	std::mt19937 g;
 17 | 	for (auto t = 0; t < 100; t++)
 18 | 	{
 19 | 		std::shuffle(inputs1, inputs1 + mipp::N<T>(), g);
 20 | 		std::shuffle(inputs2, inputs2 + mipp::N<T>(), g);
 21 | 
 22 | 		mipp::reg r1 = mipp::load<T>(inputs1);
 23 | 		mipp::reg r2 = mipp::load<T>(inputs2);
 24 | 		mipp::msk m  = mipp::cmple<T>(r1, r2);
 25 | 
 26 | 		mipp::reg r3 = mipp::toreg<mipp::N<T>()>(m);
 27 | 
 28 | 		for (auto i = 0; i < mipp::N<T>(); i++)
 29 | 		{
 30 | 			if (inputs1[i] <= inputs2[i])
 31 | 				REQUIRE(mipp::get<T>(r3, i) != (T)0);
 32 | 			else
 33 | 				REQUIRE(mipp::get<T>(r3, i) == (T)0);
 34 | 		}
 35 | 	}
 36 | }
 37 | 
 38 | #if !defined(MIPP_NO) && !defined(MIPP_SVE_LS)
 39 | TEST_CASE("Compare lower or equal - mipp::reg", "[mipp::cmple]")
 40 | {
 41 | #if defined(MIPP_64BIT)
 42 | 	SECTION("datatype = double") { test_reg_cmple<double>(); }
 43 | #endif
 44 | 	SECTION("datatype = float") { test_reg_cmple<float>(); }
 45 | 
 46 | #if !defined(MIPP_AVX) || (defined(MIPP_AVX) && MIPP_INSTR_VERSION >= 2)
 47 | #if !defined(MIPP_SSE)
 48 | #if defined(MIPP_64BIT)
 49 | 	SECTION("datatype = int64_t") { test_reg_cmple<int64_t>(); }
 50 | 	SECTION("datatype = uint64_t") { test_reg_cmple<uint64_t>(); }
 51 | #endif
 52 | #endif
 53 | 	SECTION("datatype = int32_t") { test_reg_cmple<int32_t>(); }
 54 | 	SECTION("datatype = uint32_t") { test_reg_cmple<uint32_t>(); }
 55 | #endif
 56 | #if defined(MIPP_BW)
 57 | 	SECTION("datatype = int16_t") { test_reg_cmple<int16_t>(); }
 58 | 	SECTION("datatype = uint16_t") { test_reg_cmple<uint16_t>(); }
 59 | 	SECTION("datatype = int8_t") { test_reg_cmple<int8_t>(); }
 60 | 	SECTION("datatype = uint8_t") { test_reg_cmple<uint8_t>(); }
 61 | #endif
 62 | }
 63 | #endif
 64 | 
 65 | template <typename T>
 66 | void test_Reg_cmple()
 67 | {
 68 | 	T inputs1[mipp::N<T>()], inputs2[mipp::N<T>()];
 69 | 	std::iota(inputs1, inputs1 + mipp::N<T>(), (T)1);
 70 | 	std::iota(inputs2, inputs2 + mipp::N<T>(), (T)1);
 71 | 
 72 | 	std::mt19937 g;
 73 | 	for (auto t = 0; t < 100; t++)
 74 | 	{
 75 | 		std::shuffle(inputs1, inputs1 + mipp::N<T>(), g);
 76 | 		std::shuffle(inputs2, inputs2 + mipp::N<T>(), g);
 77 | 
 78 | 		mipp::Reg<T> r1 = inputs1;
 79 | 		mipp::Reg<T> r2 = inputs2;
 80 | 		mipp::Msk<mipp::N<T>()> m = r1 <= r2;
 81 | 
 82 | 		for (auto i = 0; i < mipp::N<T>(); i++)
 83 | 			REQUIRE(m[i] == (inputs1[i] <= inputs2[i]));
 84 | 	}
 85 | }
 86 | 
 87 | #if !defined(MIPP_SVE_LS)
 88 | TEST_CASE("Compare lower or equal - mipp::Reg", "[mipp::cmple]")
 89 | {
 90 | #if defined(MIPP_64BIT)
 91 | 	SECTION("datatype = double") { test_Reg_cmple<double>(); }
 92 | #endif
 93 | 	SECTION("datatype = float") { test_Reg_cmple<float>(); }
 94 | 
 95 | #if !defined(MIPP_AVX) || (defined(MIPP_AVX) && MIPP_INSTR_VERSION >= 2)
 96 | #if !defined(MIPP_SSE)
 97 | #if defined(MIPP_64BIT)
 98 | 	SECTION("datatype = int64_t") { test_Reg_cmple<int64_t>(); }
 99 | 	SECTION("datatype = uint64_t") { test_Reg_cmple<uint64_t>(); }
100 | #endif
101 | #endif
102 | 	SECTION("datatype = int32_t") { test_Reg_cmple<int32_t>(); }
103 | 	SECTION("datatype = uint32_t") { test_Reg_cmple<uint32_t>(); }
104 | #endif
105 | #if defined(MIPP_BW)
106 | 	SECTION("datatype = int16_t") { test_Reg_cmple<int16_t>(); }
107 | 	SECTION("datatype = uint16_t") { test_Reg_cmple<uint16_t>(); }
108 | 	SECTION("datatype = int8_t") { test_Reg_cmple<int8_t>(); }
109 | 	SECTION("datatype = uint8_t") { test_Reg_cmple<uint8_t>(); }
110 | #endif
111 | }
112 | #endif
113 | 


--------------------------------------------------------------------------------
/tests/src/logical_comparisons/cmplt.cpp:
--------------------------------------------------------------------------------
  1 | #include <exception>
  2 | #include <algorithm>
  3 | #include <numeric>
  4 | #include <random>
  5 | #include <cmath>
  6 | #include <mipp.h>
  7 | #include <catch.hpp>
  8 | 
  9 | template <typename T>
 10 | void test_reg_cmplt()
 11 | {
 12 | 	T inputs1[mipp::N<T>()], inputs2[mipp::N<T>()];
 13 | 	std::iota(inputs1, inputs1 + mipp::N<T>(), (T)1);
 14 | 	std::iota(inputs2, inputs2 + mipp::N<T>(), (T)1);
 15 | 
 16 | 	std::mt19937 g;
 17 | 	for (auto t = 0; t < 100; t++)
 18 | 	{
 19 | 		std::shuffle(inputs1, inputs1 + mipp::N<T>(), g);
 20 | 		std::shuffle(inputs2, inputs2 + mipp::N<T>(), g);
 21 | 
 22 | 		mipp::reg r1 = mipp::load<T>(inputs1);
 23 | 		mipp::reg r2 = mipp::load<T>(inputs2);
 24 | 		mipp::msk m  = mipp::cmplt<T>(r1, r2);
 25 | 
 26 | 		mipp::reg r3 = mipp::toreg<mipp::N<T>()>(m);
 27 | 
 28 | 		for (auto i = 0; i < mipp::N<T>(); i++)
 29 | 		{
 30 | 			if (inputs1[i] < inputs2[i])
 31 | 				REQUIRE(mipp::get<T>(r3, i) != (T)0);
 32 | 			else
 33 | 				REQUIRE(mipp::get<T>(r3, i) == (T)0);
 34 | 		}
 35 | 	}
 36 | }
 37 | 
 38 | #if !defined(MIPP_NO) && !defined(MIPP_SVE_LS)
 39 | TEST_CASE("Compare lower than - mipp::reg", "[mipp::cmplt]")
 40 | {
 41 | #if defined(MIPP_64BIT)
 42 | 	SECTION("datatype = double") { test_reg_cmplt<double>(); }
 43 | #endif
 44 | 	SECTION("datatype = float") { test_reg_cmplt<float>(); }
 45 | 
 46 | #if !defined(MIPP_AVX) || (defined(MIPP_AVX) && MIPP_INSTR_VERSION >= 2)
 47 | #if !defined(MIPP_SSE)
 48 | #if defined(MIPP_64BIT)
 49 | 	SECTION("datatype = int64_t") { test_reg_cmplt<int64_t>(); }
 50 | 	SECTION("datatype = uint64_t") { test_reg_cmplt<uint64_t>(); }
 51 | #endif
 52 | #endif
 53 | 	SECTION("datatype = int32_t") { test_reg_cmplt<int32_t>(); }
 54 | 	SECTION("datatype = uint32_t") { test_reg_cmplt<uint32_t>(); }
 55 | #endif
 56 | #if defined(MIPP_BW)
 57 | 	SECTION("datatype = int16_t") { test_reg_cmplt<int16_t>(); }
 58 | 	SECTION("datatype = uint16_t") { test_reg_cmplt<uint16_t>(); }
 59 | 	SECTION("datatype = int8_t") { test_reg_cmplt<int8_t>(); }
 60 | 	SECTION("datatype = uint8_t") { test_reg_cmplt<uint8_t>(); }
 61 | #endif
 62 | }
 63 | #endif
 64 | 
 65 | template <typename T>
 66 | void test_Reg_cmplt()
 67 | {
 68 | 	T inputs1[mipp::N<T>()], inputs2[mipp::N<T>()];
 69 | 	std::iota(inputs1, inputs1 + mipp::N<T>(), (T)1);
 70 | 	std::iota(inputs2, inputs2 + mipp::N<T>(), (T)1);
 71 | 
 72 | 	std::mt19937 g;
 73 | 	for (auto t = 0; t < 100; t++)
 74 | 	{
 75 | 		std::shuffle(inputs1, inputs1 + mipp::N<T>(), g);
 76 | 		std::shuffle(inputs2, inputs2 + mipp::N<T>(), g);
 77 | 
 78 | 		mipp::Reg<T> r1 = inputs1;
 79 | 		mipp::Reg<T> r2 = inputs2;
 80 | 		mipp::Msk<mipp::N<T>()> m = r1 < r2;
 81 | 
 82 | 		for (auto i = 0; i < mipp::N<T>(); i++)
 83 | 			REQUIRE(m[i] == (inputs1[i] < inputs2[i]));
 84 | 	}
 85 | }
 86 | 
 87 | #if !defined(MIPP_SVE_LS)
 88 | TEST_CASE("Compare lower than - mipp::Reg", "[mipp::cmplt]")
 89 | {
 90 | #if defined(MIPP_64BIT)
 91 | 	SECTION("datatype = double") { test_Reg_cmplt<double>(); }
 92 | #endif
 93 | 	SECTION("datatype = float") { test_Reg_cmplt<float>(); }
 94 | 
 95 | #if !defined(MIPP_AVX) || (defined(MIPP_AVX) && MIPP_INSTR_VERSION >= 2)
 96 | #if !defined(MIPP_SSE)
 97 | #if defined(MIPP_64BIT)
 98 | 	SECTION("datatype = int64_t") { test_Reg_cmplt<int64_t>(); }
 99 | 	SECTION("datatype = uint64_t") { test_Reg_cmplt<uint64_t>(); }
100 | #endif
101 | #endif
102 | 	SECTION("datatype = int32_t") { test_Reg_cmplt<int32_t>(); }
103 | 	SECTION("datatype = uint32_t") { test_Reg_cmplt<uint32_t>(); }
104 | #endif
105 | #if defined(MIPP_BW)
106 | 	SECTION("datatype = int16_t") { test_Reg_cmplt<int16_t>(); }
107 | 	SECTION("datatype = uint16_t") { test_Reg_cmplt<uint16_t>(); }
108 | 	SECTION("datatype = int8_t") { test_Reg_cmplt<int8_t>(); }
109 | 	SECTION("datatype = uint8_t") { test_Reg_cmplt<uint8_t>(); }
110 | #endif
111 | }
112 | #endif
113 | 


--------------------------------------------------------------------------------
/tests/src/logical_comparisons/cmpneq.cpp:
--------------------------------------------------------------------------------
  1 | #include <exception>
  2 | #include <algorithm>
  3 | #include <numeric>
  4 | #include <random>
  5 | #include <cmath>
  6 | #include <mipp.h>
  7 | #include <catch.hpp>
  8 | 
  9 | template <typename T>
 10 | void test_reg_cmpneq()
 11 | {
 12 | 	T inputs1[mipp::N<T>()], inputs2[mipp::N<T>()];
 13 | 	std::iota(inputs1, inputs1 + mipp::N<T>(), (T)1);
 14 | 	std::iota(inputs2, inputs2 + mipp::N<T>(), (T)1);
 15 | 
 16 | 	std::mt19937 g;
 17 | 	for (auto t = 0; t < 100; t++)
 18 | 	{
 19 | 		std::shuffle(inputs1, inputs1 + mipp::N<T>(), g);
 20 | 		std::shuffle(inputs2, inputs2 + mipp::N<T>(), g);
 21 | 
 22 | 		mipp::reg r1 = mipp::load<T>(inputs1);
 23 | 		mipp::reg r2 = mipp::load<T>(inputs2);
 24 | 		mipp::msk m  = mipp::cmpneq<T>(r1, r2);
 25 | 
 26 | 		mipp::reg r3 = mipp::toreg<mipp::N<T>()>(m);
 27 | 
 28 | 		for (auto i = 0; i < mipp::N<T>(); i++)
 29 | 		{
 30 | 			if (inputs1[i] != inputs2[i])
 31 | 				REQUIRE(mipp::get<T>(r3, i) != (T)0);
 32 | 			else
 33 | 				REQUIRE(mipp::get<T>(r3, i) == (T)0);
 34 | 		}
 35 | 	}
 36 | }
 37 | 
 38 | #ifndef MIPP_NO
 39 | TEST_CASE("Compare not equal - mipp::reg", "[mipp::cmpneq]")
 40 | {
 41 | #if defined(MIPP_64BIT)
 42 | 	SECTION("datatype = double") { test_reg_cmpneq<double>(); }
 43 | #endif
 44 | 	SECTION("datatype = float") { test_reg_cmpneq<float>(); }
 45 | 
 46 | #if !defined(MIPP_AVX) || defined(MIPP_AVX2) || (defined(MIPP_AVX) && !defined(__INTEL_COMPILER) && !defined(__ICL) && !defined(__ICC))
 47 | #if defined(MIPP_64BIT)
 48 | #if !defined(MIPP_SSE) || (defined(MIPP_SSE) && MIPP_INSTR_VERSION >= 41)
 49 | 	SECTION("datatype = int64_t") { test_reg_cmpneq<int64_t>(); }
 50 | #endif
 51 | #endif
 52 | 	SECTION("datatype = int32_t") { test_reg_cmpneq<int32_t>(); }
 53 | #endif
 54 | #if defined(MIPP_BW)
 55 | 	SECTION("datatype = int16_t") { test_reg_cmpneq<int16_t>(); }
 56 | 	SECTION("datatype = int8_t") { test_reg_cmpneq<int8_t>(); }
 57 | #endif
 58 | }
 59 | #endif
 60 | 
 61 | template <typename T>
 62 | void test_Reg_cmpneq()
 63 | {
 64 | 	T inputs1[mipp::N<T>()], inputs2[mipp::N<T>()];
 65 | 	std::iota(inputs1, inputs1 + mipp::N<T>(), (T)1);
 66 | 	std::iota(inputs2, inputs2 + mipp::N<T>(), (T)1);
 67 | 
 68 | 	std::mt19937 g;
 69 | 	for (auto t = 0; t < 100; t++)
 70 | 	{
 71 | 		std::shuffle(inputs1, inputs1 + mipp::N<T>(), g);
 72 | 		std::shuffle(inputs2, inputs2 + mipp::N<T>(), g);
 73 | 
 74 | 		mipp::Reg<T> r1 = inputs1;
 75 | 		mipp::Reg<T> r2 = inputs2;
 76 | 		mipp::Msk<mipp::N<T>()> m = r1 != r2;
 77 | 
 78 | 		for (auto i = 0; i < mipp::N<T>(); i++)
 79 | 			REQUIRE(m[i] == (inputs1[i] != inputs2[i]));
 80 | 	}
 81 | }
 82 | 
 83 | TEST_CASE("Compare not equal - mipp::Reg", "[mipp::cmpneq]")
 84 | {
 85 | #if defined(MIPP_64BIT)
 86 | 	SECTION("datatype = double") { test_Reg_cmpneq<double>(); }
 87 | #endif
 88 | 	SECTION("datatype = float") { test_Reg_cmpneq<float>(); }
 89 | 
 90 | #if !defined(MIPP_AVX) || defined(MIPP_AVX2) || (defined(MIPP_AVX) && !defined(__INTEL_COMPILER) && !defined(__ICL) && !defined(__ICC))
 91 | #if defined(MIPP_64BIT)
 92 | #if !defined(MIPP_SSE) || (defined(MIPP_SSE) && MIPP_INSTR_VERSION >= 41)
 93 | 	SECTION("datatype = int64_t") { test_Reg_cmpneq<int64_t>(); }
 94 | #endif
 95 | #endif
 96 | 	SECTION("datatype = int32_t") { test_Reg_cmpneq<int32_t>(); }
 97 | #endif
 98 | #if defined(MIPP_BW)
 99 | 	SECTION("datatype = int16_t") { test_Reg_cmpneq<int16_t>(); }
100 | 	SECTION("datatype = int8_t") { test_Reg_cmpneq<int8_t>(); }
101 | #endif
102 | }
103 | 


--------------------------------------------------------------------------------
/tests/src/main.cpp:
--------------------------------------------------------------------------------
 1 | #define CATCH_CONFIG_RUNNER
 2 | #include <catch.hpp>
 3 | #include <iostream>
 4 | #include <mipp.h>
 5 | 
 6 | int main(int argc, char* argv[])
 7 | {
 8 | 	std::cout << "MIPP tests" << std::endl;
 9 | 	std::cout << "----------" << std::endl << std::endl;
10 | 
11 | 	std::cout << "Instr. type:       " << mipp::InstructionType                  << std::endl;
12 | 	std::cout << "Instr. full type:  " << mipp::InstructionFullType              << std::endl;
13 | 	std::cout << "Instr. version:    " << mipp::InstructionVersion               << std::endl;
14 | 	std::cout << "Instr. size:       " << mipp::RegisterSizeBit       << " bits" << std::endl;
15 | 	std::cout << "Instr. lanes:      " << mipp::Lanes                            << std::endl;
16 | 	std::cout << "64-bit support:    " << (mipp::Support64Bit    ? "yes" : "no") << std::endl;
17 | 	std::cout << "Byte/word support: " << (mipp::SupportByteWord ? "yes" : "no") << std::endl;
18 | 	auto ext = mipp::InstructionExtensions();
19 | 	if (ext.size() > 0)
20 | 	{
21 | 		std::cout << "Instr. extensions: {";
22 | 		for (auto i = 0; i < (int)ext.size(); i++)
23 | 			std::cout << ext[i] << (i < ((int)ext.size() -1) ? ", " : "");
24 | 		std::cout << "}" << std::endl;
25 | 	}
26 | 	std::cout << std::endl;
27 | 
28 | 	int result = Catch::Session().run(argc, argv);
29 | 
30 | 	return result;
31 | }
32 | 


--------------------------------------------------------------------------------
/tests/src/math_functions/sincos.cpp:
--------------------------------------------------------------------------------
 1 | #include <exception>
 2 | #include <algorithm>
 3 | #include <numeric>
 4 | #include <random>
 5 | #include <cmath>
 6 | #include <mipp.h>
 7 | #include <catch.hpp>
 8 | 
 9 | template <typename T>
10 | void test_reg_sincos()
11 | {
12 | 	T inputs1[mipp::N<T>()];
13 | 	std::iota(inputs1, inputs1 + mipp::N<T>(), (T)1);
14 | 
15 | 	std::mt19937 g;
16 | 	std::shuffle(inputs1, inputs1 + mipp::N<T>(), g);
17 | 
18 | 	mipp::reg r1 = mipp::load<T>(inputs1);
19 | 	mipp::reg r2, r3;
20 | 	mipp::sincos<T>(r1, r2, r3);
21 | 
22 | 	for (auto i = 0; i < mipp::N<T>(); i++)
23 | 	{
24 | 		T res1 = std::sin(inputs1[i]);
25 | 		T res2 = std::cos(inputs1[i]);
26 | 
27 | 		// REQUIRE(mipp::get<T>(r2, i) == Approx(res1).epsilon(0.001));
28 | 		REQUIRE(mipp::get<T>(r2, i) == Approx(res1));
29 | 
30 | 		// REQUIRE(mipp::get<T>(r3, i) == Approx(res2).epsilon(0.001));
31 | 		REQUIRE(mipp::get<T>(r3, i) == Approx(res2));
32 | 	}
33 | }
34 | 
35 | #if !defined(MIPP_NO) && !defined(MIPP_SVE_LS)
36 | TEST_CASE("Sine & Cosine - mipp::reg", "[mipp::sincos]")
37 | {
38 | 	SECTION("datatype = float" ) { test_reg_sincos<float>(); }
39 | #if defined(MIPP_64BIT) && (defined(__INTEL_COMPILER) || defined(__ICL) || defined(__ICC))
40 | 	SECTION("datatype = double") { test_reg_sincos<double>(); }
41 | #endif
42 | }
43 | #endif
44 | 
45 | template <typename T>
46 | void test_Reg_sincos()
47 | {
48 | 	T inputs1[mipp::N<T>()];
49 | 	std::iota(inputs1, inputs1 + mipp::N<T>(), (T)1);
50 | 
51 | 	std::mt19937 g;
52 | 	std::shuffle(inputs1, inputs1 + mipp::N<T>(), g);
53 | 
54 | 	mipp::Reg<T> r1 = inputs1;
55 | 	mipp::Reg<T> r2, r3;
56 | 	mipp::sincos(r1, r2, r3);
57 | 
58 | 	for (auto i = 0; i < mipp::N<T>(); i++)
59 | 	{
60 | 		T res1 = std::sin(inputs1[i]);
61 | 		T res2 = std::cos(inputs1[i]);
62 | 
63 | 		// REQUIRE(r2[i] == Approx(res1).epsilon(0.001));
64 | 		REQUIRE(r2[i] == Approx(res1));
65 | 
66 | 		// REQUIRE(r3[i] == Approx(res2).epsilon(0.001));
67 | 		REQUIRE(r3[i] == Approx(res2));
68 | 	}
69 | }
70 | 
71 | #if !defined(MIPP_SVE_LS)
72 | TEST_CASE("Sine & Cosine - mipp::Reg", "[mipp::sincos]")
73 | {
74 | 	SECTION("datatype = float" ) { test_Reg_sincos<float>(); }
75 | #if defined(MIPP_64BIT) && (defined(__INTEL_COMPILER) || defined(__ICL) || defined(__ICC))
76 | 	SECTION("datatype = double") { test_Reg_sincos<double>(); }
77 | #endif
78 | }
79 | #endif
80 | 


--------------------------------------------------------------------------------
/tests/src/memory_operations/blend.cpp:
--------------------------------------------------------------------------------
 1 | #include <exception>
 2 | #include <algorithm>
 3 | #include <numeric>
 4 | #include <random>
 5 | #include <cmath>
 6 | #include <mipp.h>
 7 | #include <catch.hpp>
 8 | 
 9 | template <typename T>
10 | void test_reg_blend()
11 | {
12 | 	constexpr int N = mipp::N<T>();
13 | 	T inputs1[N], inputs2[N];
14 | 	std::iota(inputs1, inputs1 + N, (T)0);
15 | 	std::iota(inputs2, inputs2 + N, (T)N);
16 | 
17 | 	bool mask[N];
18 | 	std::fill(mask,       mask + N/2, true );
19 | 	std::fill(mask + N/2, mask + N,   false);
20 | 
21 | 	std::mt19937 g;
22 | 	for (auto t = 0; t < 100; t++)
23 | 	{
24 | 		std::shuffle(mask, mask + N, g);
25 | 
26 | 		mipp::reg r1 = mipp::load<T>(inputs1);
27 | 		mipp::reg r2 = mipp::load<T>(inputs2);
28 | 		mipp::msk m  = mipp::set <N>(mask   );
29 | 
30 | 		mipp::reg ri = mipp::blend<T>(r1, r2, m);
31 | 
32 | 		for (auto i = 0; i < N; i++)
33 | 			REQUIRE(mipp::get<T>(ri, i) == (mask[i] ? inputs1[i] : inputs2[i]));
34 | 	}
35 | }
36 | 
37 | #ifndef MIPP_NO
38 | TEST_CASE("Blend - mipp::reg", "[mipp::blend]")
39 | {
40 | #if defined(MIPP_64BIT)
41 | 	SECTION("datatype = double") { test_reg_blend<double>(); }
42 | #endif
43 | 	SECTION("datatype = float") { test_reg_blend<float>(); }
44 | 
45 | #if defined(MIPP_64BIT)
46 | 	SECTION("datatype = int64_t") { test_reg_blend<int64_t>(); }
47 | #endif
48 | 	SECTION("datatype = int32_t") { test_reg_blend<int32_t>(); }
49 | #if defined(MIPP_BW)
50 | 	SECTION("datatype = int16_t") { test_reg_blend<int16_t>(); }
51 | 	SECTION("datatype = int8_t") { test_reg_blend<int8_t>(); }
52 | #endif
53 | }
54 | #endif
55 | 
56 | template <typename T>
57 | void test_Reg_blend()
58 | {
59 | 	constexpr int N = mipp::N<T>();
60 | 	T inputs1[N], inputs2[N];
61 | 	std::iota(inputs1, inputs1 + N, (T)0);
62 | 	std::iota(inputs2, inputs2 + N, (T)N);
63 | 
64 | 	bool mask[N];
65 | 	std::fill(mask,       mask + N/2, true );
66 | 	std::fill(mask + N/2, mask + N,   false);
67 | 
68 | 	std::mt19937 g;
69 | 	for (auto t = 0; t < 100; t++)
70 | 	{
71 | 		std::shuffle(mask, mask + N, g);
72 | 
73 | 		mipp::Reg<T> r1 = inputs1;
74 | 		mipp::Reg<T> r2 = inputs2;
75 | 		mipp::Msk<N> m  = mask;
76 | 
77 | 		mipp::Reg<T> ri = mipp::blend(r1, r2, m);
78 | 
79 | 		for (auto i = 0; i < N; i++)
80 | 			REQUIRE(ri[i] == (mask[i] ? inputs1[i] : inputs2[i]));
81 | 	}
82 | }
83 | 
84 | TEST_CASE("Blend - mipp::Reg", "[mipp::blend]")
85 | {
86 | #if defined(MIPP_64BIT)
87 | 	SECTION("datatype = double") { test_Reg_blend<double>(); }
88 | #endif
89 | 	SECTION("datatype = float") { test_Reg_blend<float>(); }
90 | 
91 | #if defined(MIPP_64BIT)
92 | 	SECTION("datatype = int64_t") { test_Reg_blend<int64_t>(); }
93 | #endif
94 | 	SECTION("datatype = int32_t") { test_Reg_blend<int32_t>(); }
95 | #if defined(MIPP_BW)
96 | 	SECTION("datatype = int16_t") { test_Reg_blend<int16_t>(); }
97 | 	SECTION("datatype = int8_t") { test_Reg_blend<int8_t>(); }
98 | #endif
99 | }


--------------------------------------------------------------------------------
/tests/src/memory_operations/combine.cpp:
--------------------------------------------------------------------------------
  1 | #include <exception>
  2 | #include <algorithm>
  3 | #include <numeric>
  4 | #include <random>
  5 | #include <cmath>
  6 | #include <mipp.h>
  7 | #include <catch.hpp>
  8 | 
  9 | template <typename T>
 10 | void test_reg_combine()
 11 | {
 12 | 	T inputs[mipp::N<T>()];
 13 | 
 14 | 	std::iota(inputs, inputs + mipp::N<T>(), (T)0);
 15 | 
 16 | 	mipp::reg r1 = mipp::load<T>(inputs);
 17 | 
 18 | 	mipp::reg_2 r1_lo = mipp::low <T>(r1);
 19 | 	mipp::reg_2 r1_hi = mipp::high<T>(r1);
 20 | 
 21 | 	mipp::reg r2 = mipp::combine<T>(r1_lo, r1_hi);
 22 | 
 23 | 	for (auto i = 0; i < mipp::N<T>(); i++)
 24 | 		REQUIRE(mipp::get<T>(r2, i) == inputs[i]);
 25 | }
 26 | 
 27 | #if !defined(MIPP_NO) && !defined(MIPP_SVE_LS)
 28 | TEST_CASE("Combine - mipp::reg", "[mipp::combine]")
 29 | {
 30 | #if defined(MIPP_64BIT)
 31 | 	SECTION("datatype = double") { test_reg_combine<double>(); }
 32 | #endif
 33 | 	SECTION("datatype = float") { test_reg_combine<float>(); }
 34 | 
 35 | #if defined(MIPP_64BIT)
 36 | 	SECTION("datatype = int64_t") { test_reg_combine<int64_t>(); }
 37 | #endif
 38 | 	SECTION("datatype = int32_t") { test_reg_combine<int32_t>(); }
 39 | #if defined(MIPP_BW)
 40 | 	SECTION("datatype = int16_t") { test_reg_combine<int16_t>(); }
 41 | 	SECTION("datatype = int8_t") { test_reg_combine<int8_t>(); }
 42 | #endif
 43 | }
 44 | #endif
 45 | 
 46 | template <typename T>
 47 | void test_Reg_combine()
 48 | {
 49 | 	T inputs[mipp::N<T>()];
 50 | 
 51 | 	std::iota(inputs, inputs + mipp::N<T>(), (T)0);
 52 | 
 53 | 	mipp::Reg<T> r1 = inputs;
 54 | 
 55 | 	mipp::Reg_2<T> r1_lo = r1.low ();
 56 | 	mipp::Reg_2<T> r1_hi = r1.high();
 57 | 
 58 | 	mipp::Reg<T> r2 = mipp::combine(r1_lo, r1_hi);
 59 | 
 60 | 	for (auto i = 0; i < mipp::N<T>(); i++)
 61 | 		REQUIRE(r2[i] == inputs[i]);
 62 | }
 63 | 
 64 | #if !defined(MIPP_NO) && !defined(MIPP_SVE_LS)
 65 | TEST_CASE("Combine - mipp::Reg", "[mipp::combine]")
 66 | {
 67 | #if defined(MIPP_64BIT)
 68 | 	SECTION("datatype = double") { test_Reg_combine<double>(); }
 69 | #endif
 70 | 	SECTION("datatype = float") { test_Reg_combine<float>(); }
 71 | 
 72 | #if defined(MIPP_64BIT)
 73 | 	SECTION("datatype = int64_t") { test_Reg_combine<int64_t>(); }
 74 | #endif
 75 | 	SECTION("datatype = int32_t") { test_Reg_combine<int32_t>(); }
 76 | #if defined(MIPP_BW)
 77 | 	SECTION("datatype = int16_t") { test_Reg_combine<int16_t>(); }
 78 | 	SECTION("datatype = int8_t") { test_Reg_combine<int8_t>(); }
 79 | #endif
 80 | }
 81 | #endif
 82 | 
 83 | #include "../static_for.hpp"
 84 | 
 85 | template <typename T>
 86 | struct sub_test_combine_bis
 87 | {
 88 | 	template<int LOOP_INDEX> static inline void func(const mipp::Reg<T> &r1, const mipp::Reg<T> &r2, const T *inputs)
 89 | 	{
 90 | 		mipp::Reg<T> r3 = mipp::combine<LOOP_INDEX>(r1, r2);
 91 | 
 92 | 		for (auto i = 0; i < mipp::N<T>(); i++)
 93 | 			REQUIRE(r3[i] == inputs[i + LOOP_INDEX]);
 94 | 	}
 95 | };
 96 | 
 97 | template <typename T>
 98 | void test_Reg_combine_bis()
 99 | {
100 | 	T inputs[2 * mipp::N<T>()];
101 | 
102 | 	std::iota(inputs, inputs + 2 * mipp::N<T>(), (T)0);
103 | 
104 | 	mipp::Reg<T> r1 = inputs;
105 | 	mipp::Reg<T> r2 = inputs + mipp::N<T>();
106 | 
107 | 	static_for<mipp::N<T>(), sub_test_combine_bis<T>>(r1, r2, inputs);
108 | }
109 | 
110 | #if defined(MIPP_NEON) || defined(MIPP_SSE) || (defined(MIPP_AVX) && MIPP_INSTR_VERSION >= 2) || defined(MIPP_AVX512F)
111 | #if !defined(MIPP_NO) && !defined(MIPP_SVE_LS)
112 | TEST_CASE("Combine (bis) - mipp::Reg", "[mipp::combine_bis]")
113 | {
114 | #if defined(MIPP_64BIT)
115 | 	SECTION("datatype = double") { test_Reg_combine_bis<double>(); }
116 | #endif
117 | 	SECTION("datatype = float") { test_Reg_combine_bis<float>(); }
118 | 
119 | #if defined(MIPP_64BIT)
120 | 	SECTION("datatype = int64_t") { test_Reg_combine_bis<int64_t>(); }
121 | #endif
122 | 	SECTION("datatype = int32_t") { test_Reg_combine_bis<int32_t>(); }
123 | #if defined(MIPP_BW)
124 | #if !defined(MIPP_SSE) || (defined(MIPP_SSE) && MIPP_INSTR_VERSION >= 31)
125 | 	SECTION("datatype = int16_t") { test_Reg_combine_bis<int16_t>(); }
126 | #if !defined(MIPP_AVX512) || (defined(MIPP_AVX512) && defined(MIPP_AVX512VBMI))
127 | 	SECTION("datatype = int8_t") { test_Reg_combine_bis<int8_t>(); }
128 | #endif
129 | #endif
130 | #endif
131 | }
132 | #endif
133 | #endif
134 | 


--------------------------------------------------------------------------------
/tests/src/memory_operations/compress.cpp:
--------------------------------------------------------------------------------
  1 | #include <exception>
  2 | #include <algorithm>
  3 | #include <numeric>
  4 | #include <random>
  5 | #include <cmath>
  6 | #include <mipp.h>
  7 | #include <array>
  8 | #include <catch.hpp>
  9 | 
 10 | template <typename T>
 11 | void test_reg_compress()
 12 | {
 13 | 	constexpr int N = mipp::N<T>();
 14 | 
 15 | 	T inputs1[N];
 16 | 	T expected[N];
 17 | 	bool mask1[N];
 18 | 
 19 | 	std::iota(inputs1, inputs1 + N, (T)1);
 20 | 	mipp::reg r1 = mipp::load<T>(inputs1);
 21 | 	mipp::reg r2 = mipp::set0<T>();
 22 | 
 23 | 	std::mt19937 g;
 24 | 	for (auto t = 0; t < 1000; t++)
 25 | 	{
 26 | 		// Generate random mask
 27 | 		int k = 0;
 28 | 		std::fill_n(expected, N, 0);
 29 | 		for (int i = 0; i < N; i++)
 30 | 		{
 31 | 			bool bit = (g() & 1) ? false : true; // Generate random bit
 32 | 			mask1[i] = bit;
 33 | 			if (bit)
 34 | 				expected[k++] = i + (T)1;
 35 | 		}
 36 | 
 37 | 		mipp::msk mask = mipp::set<N>(mask1);
 38 | 		
 39 | 		r2 = mipp::compress<T>(r1, mask);
 40 | 
 41 | 		for (auto i = 0; i < N; i++)
 42 | 			REQUIRE(mipp::get<T>(r2, i) == expected[i]);
 43 | 	}
 44 | }
 45 | 
 46 | #if defined(MIPP_STATIC_LIB) && !defined(MIPP_NO)
 47 | TEST_CASE("Compress - mipp::reg", "[mipp::compress]")
 48 | {
 49 | #if (defined(MIPP_SSE) && MIPP_INSTR_VERSION >= 31) || defined(MIPP_AVX512) || defined(MIPP_NEON) || (defined(MIPP_AVX2) && defined(MIPP_BMI2))
 50 | #if defined(MIPP_64BIT)
 51 | 	SECTION("datatype = double") { test_reg_compress<double>(); }
 52 | #endif
 53 | 	SECTION("datatype = float") { test_reg_compress<float>(); }
 54 | #if defined(MIPP_64BIT)
 55 | 	SECTION("datatype = int64_t") { test_reg_compress<int64_t>(); }
 56 | #endif
 57 | 	SECTION("datatype = int32_t") { test_reg_compress<int32_t>(); }
 58 | #if defined(MIPP_BW) && !defined(MIPP_AVX)
 59 | #if !defined(MIPP_AVX512) || (defined(MIPP_AVX512) && defined(MIPP_AVX512VBMI2))
 60 | #if !defined(MIPP_SSE) || (defined(MIPP_SSE) && defined(MIPP_BMI2))
 61 | 	SECTION("datatype = int16_t") { test_reg_compress<int16_t>(); }
 62 | #endif
 63 | 	SECTION("datatype = int8_t") { test_reg_compress<int8_t>(); }
 64 | #endif
 65 | #endif
 66 | #endif
 67 | }
 68 | #endif
 69 | 
 70 | template <typename T>
 71 | void test_Reg_compress()
 72 | {
 73 | 	constexpr int N = mipp::N<T>();
 74 | 
 75 | 	T inputs1[N];
 76 | 	T expected[N];
 77 | 	bool mask1[N];
 78 | 
 79 | 	std::iota(inputs1, inputs1 + N, (T)1);
 80 | 	mipp::Reg<T> r1 = inputs1;
 81 | 	mipp::Reg<T> r2 = (T)0;
 82 | 
 83 | 	std::mt19937 g;
 84 | 	for (auto t = 0; t < 1000; t++)
 85 | 	{
 86 | 		// Generate random mask
 87 | 		int k = 0;
 88 | 		std::fill_n(expected, N, 0);
 89 | 		for (int i = 0; i < N; i++)
 90 | 		{
 91 | 			bool bit = (g() & 1) ? false : true; // Generate random bit
 92 | 			mask1[i] = bit;
 93 | 			if (bit)
 94 | 				expected[k++] = i + (T)1;
 95 | 		}
 96 | 
 97 | 		mipp::Msk<mipp::N<T>()> mask = mask1;
 98 | 
 99 | 		r2 = mipp::compress(r1, mask);
100 | 
101 | 		for (auto i = 0; i < N; i++)
102 | 			REQUIRE(r2[i] == expected[i]);
103 | 	}
104 | }
105 | 
106 | #if defined(MIPP_STATIC_LIB) && !defined(MIPP_NO)
107 | TEST_CASE("Compress - mipp::Reg", "[mipp::compress]")
108 | {
109 | #if (defined(MIPP_SSE) && MIPP_INSTR_VERSION >= 31) || defined(MIPP_AVX512) || defined(MIPP_NEON) || (defined(MIPP_AVX2) && defined(MIPP_BMI2))
110 | #if defined(MIPP_64BIT)
111 | 	SECTION("datatype = double") { test_Reg_compress<double>(); }
112 | #endif
113 | 	SECTION("datatype = float") { test_Reg_compress<float>(); }
114 | #if defined(MIPP_64BIT)
115 | 	SECTION("datatype = int64_t") { test_Reg_compress<int64_t>(); }
116 | #endif
117 | 	SECTION("datatype = int32_t") { test_Reg_compress<int32_t>(); }
118 | #if defined(MIPP_BW) && !defined(MIPP_AVX)
119 | #if !defined(MIPP_AVX512) || (defined(MIPP_AVX512) && defined(MIPP_AVX512VBMI2))
120 | #if !defined(MIPP_SSE) || (defined(MIPP_SSE) && defined(MIPP_BMI2))
121 | 	SECTION("datatype = int16_t") { test_Reg_compress<int16_t>(); }
122 | #endif
123 | 	SECTION("datatype = int8_t") { test_Reg_compress<int8_t>(); }
124 | #endif
125 | #endif
126 | #endif
127 | }
128 | #endif
129 | 


--------------------------------------------------------------------------------
/tests/src/memory_operations/deinterleave.cpp:
--------------------------------------------------------------------------------
  1 | #include <exception>
  2 | #include <algorithm>
  3 | #include <numeric>
  4 | #include <random>
  5 | #include <cmath>
  6 | #include <mipp.h>
  7 | #include <catch.hpp>
  8 | 
  9 | template <typename T>
 10 | void test_reg_deinterleave()
 11 | {
 12 | 	T inputs1[mipp::N<T>()], inputs2[mipp::N<T>()];
 13 | 	std::mt19937 g;
 14 | 	std::iota   (inputs1, inputs1 + mipp::N<T>(), (T)0);
 15 | 	std::shuffle(inputs1, inputs1 + mipp::N<T>(), g);
 16 | 	std::iota   (inputs2, inputs2 + mipp::N<T>(), (T)0);
 17 | 	std::shuffle(inputs2, inputs2 + mipp::N<T>(), g);
 18 | 
 19 | 	T inputs[2*mipp::N<T>()];
 20 | 	std::copy(inputs1, inputs1 + mipp::N<T>(), inputs               );
 21 | 	std::copy(inputs2, inputs2 + mipp::N<T>(), inputs + mipp::N<T>());
 22 | 
 23 | 	mipp::reg r1 = mipp::load<T>(inputs1);
 24 | 	mipp::reg r2 = mipp::load<T>(inputs2);
 25 | 
 26 | 	mipp::regx2 ri = mipp::deinterleave<T>(r1, r2);
 27 | 
 28 | 	for (auto i = 0; i < mipp::N<T>(); i++)
 29 | 		REQUIRE(mipp::get<T>(ri.val[0], i) == inputs[2 * i +0]);
 30 | 
 31 | 	for (auto i = 0; i < mipp::N<T>(); i++)
 32 | 		REQUIRE(mipp::get<T>(ri.val[1], i) == inputs[2 * i +1]);
 33 | }
 34 | 
 35 | #if !defined(MIPP_NO) && !defined(MIPP_SVE_LS)
 36 | TEST_CASE("Deinterleave - mipp::reg", "[mipp::deinterleave]")
 37 | {
 38 | #if defined(MIPP_64BIT)
 39 | 	SECTION("datatype = double") { test_reg_deinterleave<double>(); }
 40 | #endif
 41 | 	SECTION("datatype = float") { test_reg_deinterleave<float>(); }
 42 | 
 43 | #if defined(MIPP_64BIT)
 44 | 	SECTION("datatype = int64_t") { test_reg_deinterleave<int64_t>(); }
 45 | #endif
 46 | 	SECTION("datatype = int32_t") { test_reg_deinterleave<int32_t>(); }
 47 | #if defined(MIPP_BW)
 48 | #if !defined(MIPP_SSE) || (defined(MIPP_SSE) && MIPP_INSTR_VERSION >= 31)
 49 | 	SECTION("datatype = int16_t") { test_reg_deinterleave<int16_t>(); }
 50 | #if !defined(MIPP_AVX512) || (defined(MIPP_AVX512) && defined(MIPP_AVX512VBMI))
 51 | 	SECTION("datatype = int8_t") { test_reg_deinterleave<int8_t>(); }
 52 | #endif
 53 | #endif
 54 | #endif
 55 | }
 56 | #endif
 57 | 
 58 | template <typename T>
 59 | void test_Reg_deinterleave()
 60 | {
 61 | 	T inputs1[mipp::N<T>()], inputs2[mipp::N<T>()];
 62 | 	std::mt19937 g;
 63 | 	std::iota   (inputs1, inputs1 + mipp::N<T>(), (T)0);
 64 | 	std::shuffle(inputs1, inputs1 + mipp::N<T>(), g);
 65 | 	std::iota   (inputs2, inputs2 + mipp::N<T>(), (T)0);
 66 | 	std::shuffle(inputs2, inputs2 + mipp::N<T>(), g);
 67 | 
 68 | 	T inputs[2*mipp::N<T>()];
 69 | 	std::copy(inputs1, inputs1 + mipp::N<T>(), inputs               );
 70 | 	std::copy(inputs2, inputs2 + mipp::N<T>(), inputs + mipp::N<T>());
 71 | 
 72 | 	mipp::Reg<T> r1 = inputs1;
 73 | 	mipp::Reg<T> r2 = inputs2;
 74 | 
 75 | 	mipp::Regx2<T> ri = mipp::deinterleave(r1, r2);
 76 | 
 77 | 	for (auto i = 0; i < mipp::N<T>(); i++)
 78 | 		REQUIRE(ri[0][i] == inputs[2 * i +0]);
 79 | 
 80 | 	for (auto i = 0; i < mipp::N<T>(); i++)
 81 | 		REQUIRE(ri[1][i] == inputs[2 * i +1]);
 82 | }
 83 | 
 84 | #if !defined(MIPP_SVE_LS)
 85 | TEST_CASE("Deinterleave - mipp::Reg", "[mipp::deinterleave]")
 86 | {
 87 | #if defined(MIPP_64BIT)
 88 | 	SECTION("datatype = double") { test_Reg_deinterleave<double>(); }
 89 | #endif
 90 | 	SECTION("datatype = float") { test_Reg_deinterleave<float>(); }
 91 | 
 92 | #if defined(MIPP_64BIT)
 93 | 	SECTION("datatype = int64_t") { test_Reg_deinterleave<int64_t>(); }
 94 | #endif
 95 | 	SECTION("datatype = int32_t") { test_Reg_deinterleave<int32_t>(); }
 96 | #if defined(MIPP_BW)
 97 | #if !defined(MIPP_SSE) || (defined(MIPP_SSE) && MIPP_INSTR_VERSION >= 31)
 98 | 	SECTION("datatype = int16_t") { test_Reg_deinterleave<int16_t>(); }
 99 | #if !defined(MIPP_AVX512) || (defined(MIPP_AVX512) && defined(MIPP_AVX512VBMI))
100 | 	SECTION("datatype = int8_t") { test_Reg_deinterleave<int8_t>(); }
101 | #endif
102 | #endif
103 | #endif
104 | }
105 | #endif
106 | 


--------------------------------------------------------------------------------
/tests/src/memory_operations/gather.cpp:
--------------------------------------------------------------------------------
  1 | #include <exception>
  2 | #include <algorithm>
  3 | #include <numeric>
  4 | #include <random>
  5 | #include <cmath>
  6 | #include <mipp.h>
  7 | #include <catch.hpp>
  8 | 
  9 | template <typename TD, typename TI>
 10 | void test_reg_gather()
 11 | {
 12 | 	constexpr int N = mipp::N<TD>();
 13 | 	TD inputs [N*10];
 14 | 	TI indexes[N*10];
 15 | 	std::iota(inputs,  inputs  + N*10, (TD)0);
 16 | 	std::iota(indexes, indexes + N*10, (TI)0);
 17 | 
 18 | 	std::mt19937 g;
 19 | 	for (auto i = 0; i < 20; i++)
 20 | 	{
 21 | 		std::shuffle(indexes, indexes + N*10, g);
 22 | 
 23 | 		mipp::reg idx = mipp::loadu<TI>(indexes);
 24 | 		mipp::reg r = mipp::gather<TD,TI>(inputs, idx);
 25 | 
 26 | 		for (auto i = 0; i < N; i++)
 27 | 			REQUIRE(+mipp::get<TD>(r, i) == +inputs[indexes[i]]);
 28 | 	}
 29 | }
 30 | 
 31 | #ifndef MIPP_NO
 32 | TEST_CASE("Gather - mipp::reg", "[mipp::gather]")
 33 | {
 34 | #if defined(MIPP_64BIT)
 35 | 	SECTION("datatype = double") { test_reg_gather<double,int64_t>(); }
 36 | #endif
 37 | 	SECTION("datatype = float") { test_reg_gather<float,int32_t>(); }
 38 | #if defined(MIPP_64BIT)
 39 | 	SECTION("datatype = int64_t") { test_reg_gather<int64_t,int64_t>(); }
 40 | #endif
 41 | 	SECTION("datatype = int32_t") { test_reg_gather<int32_t,int32_t>(); }
 42 | #if defined(MIPP_BW)
 43 | 	SECTION("datatype = int16_t") { test_reg_gather<int16_t,int16_t>(); }
 44 | 	//SECTION("datatype = int8_t") { test_reg_gather<int8_t,int8_t>(); }
 45 | #endif
 46 | }
 47 | #endif
 48 | 
 49 | template <typename TD, typename TI>
 50 | void test_Reg_gather()
 51 | {
 52 | 	constexpr int N = mipp::N<TD>();
 53 | 	TD inputs [N*10];
 54 | 	TI indexes[N*10];
 55 | 	std::iota(inputs,  inputs  + N*10, (TD)0);
 56 | 	std::iota(indexes, indexes + N*10, (TI)0);
 57 | 
 58 | 	std::mt19937 g;
 59 | 	for (auto i = 0; i < 20; i++)
 60 | 	{
 61 | 		std::shuffle(indexes, indexes + N*10, g);
 62 | 
 63 | 		mipp::Reg<TI> idx = mipp::oloadu(indexes);
 64 | 		mipp::Reg<TD> r = mipp::gather<TD>(inputs, idx);
 65 | 
 66 | 		for (auto i = 0; i < N; i++)
 67 | 			REQUIRE(+r[i] == +inputs[indexes[i]]);
 68 | 	}
 69 | }
 70 | 
 71 | TEST_CASE("Gather - mipp::Reg", "[mipp::gather]")
 72 | {
 73 | #if defined(MIPP_64BIT)
 74 | 	SECTION("datatype = double") { test_Reg_gather<double,int64_t>(); }
 75 | #endif
 76 | 	SECTION("datatype = float") { test_Reg_gather<float,int32_t>(); }
 77 | #if defined(MIPP_64BIT)
 78 | 	SECTION("datatype = int64_t") { test_Reg_gather<int64_t,int64_t>(); }
 79 | #endif
 80 | 	SECTION("datatype = int32_t") { test_Reg_gather<int32_t,int32_t>(); }
 81 | #if defined(MIPP_BW)
 82 | 	SECTION("datatype = int16_t") { test_Reg_gather<int16_t,int16_t>(); }
 83 | 	//SECTION("datatype = int8_t") { test_Reg_gather<int8_t,int8_t>(); }
 84 | #endif
 85 | }
 86 | 
 87 | template <typename TD, typename TI>
 88 | void test_reg_maskzgat()
 89 | {
 90 | 	constexpr int N = mipp::N<TD>();
 91 | 	TD inputs [N*10];
 92 | 	TI indexes[N*10];
 93 | 	std::iota(inputs,  inputs  + N*10, (TD)0);
 94 | 	std::iota(indexes, indexes + N*10, (TI)0);
 95 | 
 96 | 	bool mask[N];
 97 | 	std::fill(mask,       mask + N/2, true );
 98 | 	std::fill(mask + N/2, mask + N,   false);
 99 | 
100 | 	std::mt19937 g;
101 | 	for (auto i = 0; i < 20; i++)
102 | 	{
103 | 		std::shuffle(indexes, indexes + N*10, g);
104 | 		std::shuffle(mask,    mask    + N,    g);
105 | 
106 | 		mipp::msk m = mipp::set<N>(mask);
107 | 		mipp::reg idx = mipp::loadu<TI>(indexes);
108 | 		mipp::reg r = mipp::maskzgat<TD,TI>(m, inputs, idx);
109 | 
110 | 		for (auto i = 0; i < N; i++)
111 | 		{
112 | 			if (mask[i])
113 | 				REQUIRE(+mipp::get<TD>(r, i) == +inputs[indexes[i]]);
114 | 			else
115 | 				REQUIRE(+mipp::get<TD>(r, i) == (TD)0);
116 | 		}
117 | 	}
118 | }
119 | 
120 | #ifndef MIPP_NO
121 | TEST_CASE("Masked gather - mipp::reg", "[mipp::maskzgat]")
122 | {
123 | #if defined(MIPP_64BIT)
124 | 	SECTION("datatype = double") { test_reg_maskzgat<double,int64_t>(); }
125 | #endif
126 | 	SECTION("datatype = float") { test_reg_maskzgat<float,int32_t>(); }
127 | #if defined(MIPP_64BIT)
128 | 	SECTION("datatype = int64_t") { test_reg_maskzgat<int64_t,int64_t>(); }
129 | #endif
130 | 	SECTION("datatype = int32_t") { test_reg_maskzgat<int32_t,int32_t>(); }
131 | #if defined(MIPP_BW)
132 | 	SECTION("datatype = int16_t") { test_reg_maskzgat<int16_t,int16_t>(); }
133 | 	//SECTION("datatype = int8_t") { test_reg_maskzgat<int8_t,int8_t>(); }
134 | #endif
135 | }
136 | #endif
137 | 
138 | template <typename TD, typename TI>
139 | void test_Reg_maskzgat()
140 | {
141 | 	constexpr int N = mipp::N<TD>();
142 | 	TD inputs [N*10];
143 | 	TI indexes[N*10];
144 | 	std::iota(inputs,  inputs  + N*10, (TD)0);
145 | 	std::iota(indexes, indexes + N*10, (TI)0);
146 | 
147 | 	bool mask[N];
148 | 	std::fill(mask,       mask + N/2, true );
149 | 	std::fill(mask + N/2, mask + N,   false);
150 | 
151 | 	std::mt19937 g;
152 | 	for (auto i = 0; i < 20; i++)
153 | 	{
154 | 		std::shuffle(indexes, indexes + N*10, g);
155 | 		std::shuffle(mask,    mask    + N,    g);
156 | 
157 | 		mipp::Msk<N> m = mipp::oset<N>(mask);
158 | 		mipp::Reg<TI> idx = mipp::oloadu(indexes);
159 | 		mipp::Reg<TD> r = mipp::maskzgat(m, inputs, idx);
160 | 
161 | 		for (auto i = 0; i < N; i++)
162 | 		{
163 | 			if (mask[i])
164 | 				REQUIRE(+r[i] == +inputs[indexes[i]]);
165 | 			else
166 | 				REQUIRE(+r[i] == (TD)0);
167 | 		}
168 | 	}
169 | }
170 | 
171 | TEST_CASE("Masked gather - mipp::Reg", "[mipp::maskzgat]")
172 | {
173 | #if defined(MIPP_64BIT)
174 | 	SECTION("datatype = double") { test_Reg_maskzgat<double,int64_t>(); }
175 | #endif
176 | 	SECTION("datatype = float") { test_Reg_maskzgat<float,int32_t>(); }
177 | #if defined(MIPP_64BIT)
178 | 	SECTION("datatype = int64_t") { test_Reg_maskzgat<int64_t,int64_t>(); }
179 | #endif
180 | 	SECTION("datatype = int32_t") { test_Reg_maskzgat<int32_t,int32_t>(); }
181 | #if defined(MIPP_BW)
182 | 	SECTION("datatype = int16_t") { test_Reg_maskzgat<int16_t,int16_t>(); }
183 | 	//SECTION("datatype = int8_t") { test_Reg_maskzgat<int8_t,int8_t>(); }
184 | #endif
185 | }


--------------------------------------------------------------------------------
/tests/src/memory_operations/get.cpp:
--------------------------------------------------------------------------------
  1 | #include <exception>
  2 | #include <algorithm>
  3 | #include <numeric>
  4 | #include <random>
  5 | #include <cmath>
  6 | #include <mipp.h>
  7 | #include <catch.hpp>
  8 | 
  9 | template <typename T>
 10 | void test_reg_get()
 11 | {
 12 | 	T inputs[mipp::N<T>()];
 13 | 	std::iota(inputs, inputs + mipp::N<T>(), (T)0);
 14 | 
 15 | 	mipp::reg r = mipp::load<T>(inputs);
 16 | 
 17 | 	for (auto i = 0; i < mipp::N<T>(); i++)
 18 | 		REQUIRE(mipp::get<T>(r, i) == (T)i);
 19 | }
 20 | 
 21 | #if !defined(MIPP_NO) && !defined(MIPP_SVE_LS)
 22 | TEST_CASE("Get - mipp::reg", "[mipp::get]")
 23 | {
 24 | #if defined(MIPP_64BIT)
 25 | 	SECTION("datatype = double") { test_reg_get<double>(); }
 26 | #endif
 27 | 	SECTION("datatype = float") { test_reg_get<float>(); }
 28 | 
 29 | #if defined(MIPP_64BIT)
 30 | 	SECTION("datatype = int64_t") { test_reg_get<int64_t>(); }
 31 | #endif
 32 | 	SECTION("datatype = int32_t") { test_reg_get<int32_t>(); }
 33 | #if defined(MIPP_BW)
 34 | 	SECTION("datatype = int16_t") { test_reg_get<int16_t>(); }
 35 | 	SECTION("datatype = int8_t") { test_reg_get<int8_t>(); }
 36 | #endif
 37 | }
 38 | #endif
 39 | 
 40 | template <typename T>
 41 | void test_Reg_get()
 42 | {
 43 | 	T inputs[mipp::N<T>()];
 44 | 	std::iota(inputs, inputs + mipp::N<T>(), (T)0);
 45 | 
 46 | 	mipp::Reg<T> r = mipp::oload<T>(inputs);
 47 | 
 48 | 	for (auto i = 0; i < mipp::N<T>(); i++)
 49 | 		REQUIRE(mipp::get(r, i) == (T)i);
 50 | }
 51 | 
 52 | #if !defined(MIPP_SVE_LS)
 53 | TEST_CASE("Get - mipp::Reg", "[mipp::get]")
 54 | {
 55 | #if defined(MIPP_64BIT)
 56 | 	SECTION("datatype = double") { test_Reg_get<double>(); }
 57 | #endif
 58 | 	SECTION("datatype = float") { test_Reg_get<float>(); }
 59 | 
 60 | #if defined(MIPP_64BIT)
 61 | 	SECTION("datatype = int64_t") { test_Reg_get<int64_t>(); }
 62 | #endif
 63 | 	SECTION("datatype = int32_t") { test_Reg_get<int32_t>(); }
 64 | #if defined(MIPP_BW)
 65 | 	SECTION("datatype = int16_t") { test_Reg_get<int16_t>(); }
 66 | 	SECTION("datatype = int8_t") { test_Reg_get<int8_t>(); }
 67 | #endif
 68 | }
 69 | #endif
 70 | 
 71 | template <typename T>
 72 | void test_reg_2_get()
 73 | {
 74 | 	T inputs[mipp::N<T>()];
 75 | 	std::iota(inputs, inputs + mipp::N<T>(), (T)0);
 76 | 
 77 | 	mipp::reg r = mipp::load<T>(inputs);
 78 | 
 79 | 	mipp::reg_2 rl = mipp::low<T>(r);
 80 | 	mipp::reg_2 rh = mipp::high<T>(r);
 81 | 
 82 | 	for (auto i = 0; i < mipp::N<T>()/2; i++)
 83 | 		REQUIRE(mipp::get<T>(rl, i) == (T)i);
 84 | 
 85 | 	for (auto i = 0; i < mipp::N<T>()/2; i++)
 86 | 		REQUIRE(mipp::get<T>(rh, i) == (T)(i+mipp::N<T>()/2));
 87 | }
 88 | 
 89 | #if !defined(MIPP_NO) && !defined(MIPP_SVE_LS)
 90 | TEST_CASE("Get - mipp::reg_2", "[mipp::get]")
 91 | {
 92 | #if defined(MIPP_64BIT)
 93 | 	SECTION("datatype = double") { test_reg_2_get<double>(); }
 94 | #endif
 95 | 	SECTION("datatype = float") { test_reg_2_get<float>(); }
 96 | 
 97 | #if defined(MIPP_64BIT)
 98 | 	SECTION("datatype = int64_t") { test_reg_2_get<int64_t>(); }
 99 | #endif
100 | 	SECTION("datatype = int32_t") { test_reg_2_get<int32_t>(); }
101 | #if defined(MIPP_BW)
102 | 	SECTION("datatype = int16_t") { test_reg_2_get<int16_t>(); }
103 | 	SECTION("datatype = int8_t") { test_reg_2_get<int8_t>(); }
104 | #endif
105 | }
106 | #endif
107 | 
108 | template <typename T>
109 | void test_Reg_2_get()
110 | {
111 | 	T inputs[mipp::N<T>()];
112 | 	std::iota(inputs, inputs + mipp::N<T>(), (T)0);
113 | 
114 | 	mipp::Reg<T> r = mipp::oload<T>(inputs);
115 | 
116 | 	mipp::Reg_2<T> rl = mipp::low<T>(r);
117 | 	mipp::Reg_2<T> rh = mipp::high<T>(r);
118 | 
119 | 	for (auto i = 0; i < mipp::N<T>()/2; i++)
120 | 		REQUIRE(mipp::get(rl, i) == (T)i);
121 | 
122 | 	for (auto i = 0; i < mipp::N<T>()/2; i++)
123 | 		REQUIRE(mipp::get(rh, i) == (T)(i+mipp::N<T>()/2));
124 | }
125 | 
126 | #if !defined(MIPP_SVE_LS)
127 | TEST_CASE("Get - mipp::Reg_2", "[mipp::get]")
128 | {
129 | #if defined(MIPP_64BIT)
130 | 	SECTION("datatype = double") { test_Reg_2_get<double>(); }
131 | #endif
132 | 	SECTION("datatype = float") { test_Reg_2_get<float>(); }
133 | 
134 | #if defined(MIPP_64BIT)
135 | 	SECTION("datatype = int64_t") { test_Reg_2_get<int64_t>(); }
136 | #endif
137 | 	SECTION("datatype = int32_t") { test_Reg_2_get<int32_t>(); }
138 | #if defined(MIPP_BW)
139 | 	SECTION("datatype = int16_t") { test_Reg_2_get<int16_t>(); }
140 | 	SECTION("datatype = int8_t") { test_Reg_2_get<int8_t>(); }
141 | #endif
142 | }
143 | #endif
144 | 
145 | template <typename T>
146 | void test_msk_get()
147 | {
148 | 	constexpr int N = mipp::N<T>();
149 | 
150 | 	bool mask[N];
151 | 	std::fill(mask,       mask + N/2, true );
152 | 	std::fill(mask + N/2, mask + N,   false);
153 | 
154 | 	std::mt19937 g;
155 | 	std::shuffle(mask, mask + N, g);
156 | 
157 | 	mipp::msk m = mipp::set<N>(mask);
158 | 
159 | 	for (auto i = 0; i < mipp::N<T>(); i++)
160 | 		REQUIRE(mipp::get<N>(m, i) == mask[i]);
161 | }
162 | 
163 | #if !defined(MIPP_NO) && !defined(MIPP_SVE_LS)
164 | TEST_CASE("Get - mipp::msk", "[mipp::get]")
165 | {
166 | #if defined(MIPP_64BIT)
167 | 	SECTION("datatype = double") { test_msk_get<double>(); }
168 | #endif
169 | 	SECTION("datatype = float") { test_msk_get<float>(); }
170 | 
171 | #if defined(MIPP_64BIT)
172 | 	SECTION("datatype = int64_t") { test_msk_get<int64_t>(); }
173 | #endif
174 | 	SECTION("datatype = int32_t") { test_msk_get<int32_t>(); }
175 | #if defined(MIPP_BW)
176 | 	SECTION("datatype = int16_t") { test_msk_get<int16_t>(); }
177 | 	SECTION("datatype = int8_t") { test_msk_get<int8_t>(); }
178 | #endif
179 | }
180 | #endif
181 | 
182 | template <typename T>
183 | void test_Msk_get()
184 | {
185 | 	constexpr int N = mipp::N<T>();
186 | 
187 | 	bool mask[N];
188 | 	std::fill(mask,       mask + N/2, true );
189 | 	std::fill(mask + N/2, mask + N,   false);
190 | 
191 | 	std::mt19937 g;
192 | 	std::shuffle(mask, mask + N, g);
193 | 
194 | 	mipp::Msk<N> m = mipp::oset<N>(mask);
195 | 
196 | 	for (auto i = 0; i < mipp::N<T>(); i++)
197 | 		REQUIRE(mipp::get(m, i) == mask[i]);
198 | }
199 | 
200 | #if !defined(MIPP_SVE_LS)
201 | TEST_CASE("Get - mipp::Msk", "[mipp::get]")
202 | {
203 | #if defined(MIPP_64BIT)
204 | 	SECTION("datatype = double") { test_Msk_get<double>(); }
205 | #endif
206 | 	SECTION("datatype = float") { test_Msk_get<float>(); }
207 | 
208 | #if defined(MIPP_64BIT)
209 | 	SECTION("datatype = int64_t") { test_Msk_get<int64_t>(); }
210 | #endif
211 | 	SECTION("datatype = int32_t") { test_Msk_get<int32_t>(); }
212 | #if defined(MIPP_BW)
213 | 	SECTION("datatype = int16_t") { test_Msk_get<int16_t>(); }
214 | 	SECTION("datatype = int8_t") { test_Msk_get<int8_t>(); }
215 | #endif
216 | }
217 | #endif
218 | 


--------------------------------------------------------------------------------
/tests/src/memory_operations/high.cpp:
--------------------------------------------------------------------------------
 1 | #include <exception>
 2 | #include <algorithm>
 3 | #include <numeric>
 4 | #include <random>
 5 | #include <cmath>
 6 | #include <mipp.h>
 7 | #include <catch.hpp>
 8 | 
 9 | template <typename T>
10 | void test_reg_high()
11 | {
12 | 	T inputs[mipp::N<T>()];
13 | 	std::iota(inputs, inputs + mipp::N<T>(), (T)0);
14 | 
15 | 	mipp::reg   r   = mipp::load<T>(inputs);
16 | 	mipp::reg_2 r_2 = mipp::high <T>(r);
17 | 
18 | 	for (auto i = 0; i < mipp::N<T>()/2; i++)
19 | 		REQUIRE(mipp::get<T>(r_2, i) == inputs[mipp::N<T>()/2 +i]);
20 | }
21 | 
22 | #if !defined(MIPP_NO) && !defined(MIPP_SVE_LS)
23 | TEST_CASE("High - mipp::reg", "[mipp::high]")
24 | {
25 | #if defined(MIPP_64BIT)
26 | 	SECTION("datatype = double") { test_reg_high<double>(); }
27 | #endif
28 | 	SECTION("datatype = float") { test_reg_high<float>(); }
29 | 
30 | #if defined(MIPP_64BIT)
31 | 	SECTION("datatype = int64_t") { test_reg_high<int64_t>(); }
32 | #endif
33 | 	SECTION("datatype = int32_t") { test_reg_high<int32_t>(); }
34 | #if defined(MIPP_BW)
35 | 	SECTION("datatype = int16_t") { test_reg_high<int16_t>(); }
36 | 	SECTION("datatype = int8_t") { test_reg_high<int8_t>(); }
37 | #endif
38 | }
39 | #endif
40 | 
41 | template <typename T>
42 | void test_Reg_high()
43 | {
44 | 	T inputs[mipp::N<T>()];
45 | 	std::iota(inputs, inputs + mipp::N<T>(), (T)0);
46 | 
47 | 	mipp::Reg  <T> r   = inputs;
48 | 	mipp::Reg_2<T> r_2 = r.high();
49 | 
50 | 	for (auto i = 0; i < mipp::N<T>()/2; i++)
51 | 		REQUIRE(r_2[i] == inputs[mipp::N<T>()/2 +i]);
52 | }
53 | 
54 | #if !defined(MIPP_SVE_LS)
55 | TEST_CASE("High - mipp::Reg", "[mipp::high]")
56 | {
57 | #if defined(MIPP_64BIT)
58 | 	SECTION("datatype = double") { test_Reg_high<double>(); }
59 | #endif
60 | 	SECTION("datatype = float") { test_Reg_high<float>(); }
61 | 
62 | #if defined(MIPP_64BIT)
63 | 	SECTION("datatype = int64_t") { test_Reg_high<int64_t>(); }
64 | #endif
65 | 	SECTION("datatype = int32_t") { test_Reg_high<int32_t>(); }
66 | #if defined(MIPP_BW)
67 | 	SECTION("datatype = int16_t") { test_Reg_high<int16_t>(); }
68 | 	SECTION("datatype = int8_t") { test_Reg_high<int8_t>(); }
69 | #endif
70 | }
71 | #endif
72 | 


--------------------------------------------------------------------------------
/tests/src/memory_operations/low.cpp:
--------------------------------------------------------------------------------
 1 | #include <exception>
 2 | #include <algorithm>
 3 | #include <numeric>
 4 | #include <random>
 5 | #include <cmath>
 6 | #include <mipp.h>
 7 | #include <catch.hpp>
 8 | 
 9 | template <typename T>
10 | void test_reg_low()
11 | {
12 | 	T inputs[mipp::N<T>()];
13 | 	std::iota(inputs, inputs + mipp::N<T>(), (T)0);
14 | 
15 | 	mipp::reg   r   = mipp::load<T>(inputs);
16 | 	mipp::reg_2 r_2 = mipp::low <T>(r);
17 | 
18 | 	for (auto i = 0; i < mipp::N<T>()/2; i++)
19 | 		REQUIRE(mipp::get<T>(r_2, i) == inputs[i]);
20 | }
21 | 
22 | #if !defined(MIPP_NO) && !defined(MIPP_SVE_LS)
23 | TEST_CASE("Low - mipp::reg", "[mipp::low]")
24 | {
25 | #if defined(MIPP_64BIT)
26 | 	SECTION("datatype = double") { test_reg_low<double>(); }
27 | #endif
28 | 	SECTION("datatype = float") { test_reg_low<float>(); }
29 | 
30 | #if defined(MIPP_64BIT)
31 | 	SECTION("datatype = int64_t") { test_reg_low<int64_t>(); }
32 | #endif
33 | 	SECTION("datatype = int32_t") { test_reg_low<int32_t>(); }
34 | #if defined(MIPP_BW)
35 | 	SECTION("datatype = int16_t") { test_reg_low<int16_t>(); }
36 | 	SECTION("datatype = int8_t") { test_reg_low<int8_t>(); }
37 | #endif
38 | }
39 | #endif
40 | 
41 | template <typename T>
42 | void test_Reg_low()
43 | {
44 | 	T inputs[mipp::N<T>()];
45 | 	std::iota(inputs, inputs + mipp::N<T>(), (T)0);
46 | 
47 | 	mipp::Reg  <T> r   = inputs;
48 | 	mipp::Reg_2<T> r_2 = r.low();
49 | 
50 | 	for (auto i = 0; i < mipp::N<T>()/2; i++)
51 | 		REQUIRE(r_2[i] == inputs[i]);
52 | }
53 | 
54 | #if !defined(MIPP_SVE_LS)
55 | TEST_CASE("Low - mipp::Reg", "[mipp::low]")
56 | {
57 | #if defined(MIPP_64BIT)
58 | 	SECTION("datatype = double") { test_Reg_low<double>(); }
59 | #endif
60 | 	SECTION("datatype = float") { test_Reg_low<float>(); }
61 | 
62 | #if defined(MIPP_64BIT)
63 | 	SECTION("datatype = int64_t") { test_Reg_low<int64_t>(); }
64 | #endif
65 | 	SECTION("datatype = int32_t") { test_Reg_low<int32_t>(); }
66 | #if defined(MIPP_BW)
67 | 	SECTION("datatype = int16_t") { test_Reg_low<int16_t>(); }
68 | 	SECTION("datatype = int8_t") { test_Reg_low<int8_t>(); }
69 | #endif
70 | }
71 | #endif
72 | 


--------------------------------------------------------------------------------
/tests/src/memory_operations/lrot.cpp:
--------------------------------------------------------------------------------
 1 | #include <exception>
 2 | #include <algorithm>
 3 | #include <numeric>
 4 | #include <random>
 5 | #include <cmath>
 6 | #include <mipp.h>
 7 | #include <catch.hpp>
 8 | 
 9 | template <typename T>
10 | void test_reg_lrot()
11 | {
12 | 	constexpr int N = mipp::N<T>();
13 | 	T inputs1[N];
14 | 	std::iota(inputs1, inputs1 + N, (T)0);
15 | 
16 | 	mipp::reg r1 = mipp::load<T>(inputs1);
17 | 	mipp::reg r2 = mipp::lrot<T>(r1);
18 | 
19 | 	for (auto i = 0; i < N; i++)
20 | 		REQUIRE(mipp::get<T>(r2, i) == (i == N-1 ? inputs1[0] : inputs1[i+1]));
21 | }
22 | 
23 | #if !defined(MIPP_NO) && !defined(MIPP_SVE_LS)
24 | TEST_CASE("Left rotation (cyclic permutation) - mipp::reg", "[mipp::lrot]")
25 | {
26 | #if defined(MIPP_64BIT)
27 | 	SECTION("datatype = double") { test_reg_lrot<double>(); }
28 | #endif
29 | 	SECTION("datatype = float") { test_reg_lrot<float>(); }
30 | 
31 | #if defined(MIPP_64BIT)
32 | 	SECTION("datatype = int64_t") { test_reg_lrot<int64_t>(); }
33 | #endif
34 | 	SECTION("datatype = int32_t") { test_reg_lrot<int32_t>(); }
35 | #if defined(MIPP_BW)
36 | #if (!defined(MIPP_SSE) && !defined(MIPP_AVX512)) || (defined(MIPP_SSE) && MIPP_INSTR_VERSION >= 31) || (defined(MIPP_AVX512) && defined(MIPP_AVX512BW))
37 | 	SECTION("datatype = int16_t") { test_reg_lrot<int16_t>(); }
38 | #if !defined(MIPP_AVX512) || (defined(MIPP_AVX512) && defined(MIPP_AVX512VBMI))
39 | 	SECTION("datatype = int8_t") { test_reg_lrot<int8_t>(); }
40 | #endif
41 | #endif
42 | #endif
43 | }
44 | #endif
45 | 
46 | template <typename T>
47 | void test_Reg_lrot()
48 | {
49 | 	constexpr int N = mipp::N<T>();
50 | 	T inputs1[N];
51 | 	std::iota(inputs1, inputs1 + N, (T)0);
52 | 
53 | 	mipp::Reg<T> r1 = inputs1;
54 | 	mipp::Reg<T> r2 = mipp::lrot(r1);
55 | 
56 | 	for (auto i = 0; i < N; i++)
57 | 		REQUIRE(r2[i] == (i == N-1 ? inputs1[0] : inputs1[i+1]));
58 | }
59 | 
60 | #if !defined(MIPP_SVE_LS)
61 | TEST_CASE("Left rotation (cyclic permutation) - mipp::Reg", "[mipp::lrot]")
62 | {
63 | #if defined(MIPP_64BIT)
64 | 	SECTION("datatype = double") { test_Reg_lrot<double>(); }
65 | #endif
66 | 	SECTION("datatype = float") { test_Reg_lrot<float>(); }
67 | 
68 | #if defined(MIPP_64BIT)
69 | 	SECTION("datatype = int64_t") { test_Reg_lrot<int64_t>(); }
70 | #endif
71 | 	SECTION("datatype = int32_t") { test_Reg_lrot<int32_t>(); }
72 | #if defined(MIPP_BW)
73 | #if (!defined(MIPP_SSE) && !defined(MIPP_AVX512)) || (defined(MIPP_SSE) && MIPP_INSTR_VERSION >= 31) || (defined(MIPP_AVX512) && defined(MIPP_AVX512BW))
74 | 	SECTION("datatype = int16_t") { test_Reg_lrot<int16_t>(); }
75 | #if !defined(MIPP_AVX512) || (defined(MIPP_AVX512) && defined(MIPP_AVX512VBMI))
76 | 	SECTION("datatype = int8_t") { test_Reg_lrot<int8_t>(); }
77 | #endif
78 | #endif
79 | #endif
80 | }
81 | #endif
82 | 


--------------------------------------------------------------------------------
/tests/src/memory_operations/rrot.cpp:
--------------------------------------------------------------------------------
 1 | #include <exception>
 2 | #include <algorithm>
 3 | #include <numeric>
 4 | #include <random>
 5 | #include <cmath>
 6 | #include <mipp.h>
 7 | #include <catch.hpp>
 8 | 
 9 | template <typename T>
10 | void test_reg_rrot()
11 | {
12 | 	constexpr int N = mipp::N<T>();
13 | 	T inputs1[N];
14 | 	std::iota(inputs1, inputs1 + N, (T)0);
15 | 
16 | 	mipp::reg r1 = mipp::load<T>(inputs1);
17 | 	mipp::reg r2 = mipp::rrot<T>(r1);
18 | 
19 | 	for (auto i = 0; i < N; i++)
20 | 		REQUIRE(mipp::get<T>(r2, i) == (i == 0 ? inputs1[N-1] : inputs1[i-1]));
21 | }
22 | 
23 | #if !defined(MIPP_NO) && !defined(MIPP_SVE_LS)
24 | TEST_CASE("Right rotation (cyclic permutation) - mipp::reg", "[mipp::rrot]")
25 | {
26 | #if defined(MIPP_64BIT)
27 | 	SECTION("datatype = double") { test_reg_rrot<double>(); }
28 | #endif
29 | 	SECTION("datatype = float") { test_reg_rrot<float>(); }
30 | 
31 | #if defined(MIPP_64BIT)
32 | 	SECTION("datatype = int64_t") { test_reg_rrot<int64_t>(); }
33 | #endif
34 | 	SECTION("datatype = int32_t") { test_reg_rrot<int32_t>(); }
35 | #if defined(MIPP_BW)
36 | #if (!defined(MIPP_SSE) && !defined(MIPP_AVX512)) || (defined(MIPP_SSE) && MIPP_INSTR_VERSION >= 31) || (defined(MIPP_AVX512) && defined(MIPP_AVX512BW))
37 | 	SECTION("datatype = int16_t") { test_reg_rrot<int16_t>(); }
38 | #if !defined(MIPP_AVX512) || (defined(MIPP_AVX512) && defined(MIPP_AVX512VBMI))
39 | 	SECTION("datatype = int8_t") { test_reg_rrot<int8_t>(); }
40 | #endif
41 | #endif
42 | #endif
43 | }
44 | #endif
45 | 
46 | template <typename T>
47 | void test_Reg_rrot()
48 | {
49 | 	constexpr int N = mipp::N<T>();
50 | 	T inputs1[N];
51 | 	std::iota(inputs1, inputs1 + N, (T)0);
52 | 
53 | 	mipp::Reg<T> r1 = inputs1;
54 | 	mipp::Reg<T> r2 = mipp::rrot(r1);
55 | 
56 | 	for (auto i = 0; i < N; i++)
57 | 		REQUIRE(r2[i] == (i == 0 ? inputs1[N-1] : inputs1[i-1]));
58 | }
59 | 
60 | #if !defined(MIPP_SVE_LS)
61 | TEST_CASE("Right rotation (cyclic permutation) - mipp::Reg", "[mipp::rrot]")
62 | {
63 | #if defined(MIPP_64BIT)
64 | 	SECTION("datatype = double") { test_Reg_rrot<double>(); }
65 | #endif
66 | 	SECTION("datatype = float") { test_Reg_rrot<float>(); }
67 | 
68 | #if defined(MIPP_64BIT)
69 | 	SECTION("datatype = int64_t") { test_Reg_rrot<int64_t>(); }
70 | #endif
71 | 	SECTION("datatype = int32_t") { test_Reg_rrot<int32_t>(); }
72 | #if defined(MIPP_BW)
73 | #if (!defined(MIPP_SSE) && !defined(MIPP_AVX512)) || (defined(MIPP_SSE) && MIPP_INSTR_VERSION >= 31) || (defined(MIPP_AVX512) && defined(MIPP_AVX512BW))
74 | 	SECTION("datatype = int16_t") { test_Reg_rrot<int16_t>(); }
75 | #if !defined(MIPP_AVX512) || (defined(MIPP_AVX512) && defined(MIPP_AVX512VBMI))
76 | 	SECTION("datatype = int8_t") { test_Reg_rrot<int8_t>(); }
77 | #endif
78 | #endif
79 | #endif
80 | }
81 | #endif
82 | 


--------------------------------------------------------------------------------
/tests/src/reductions/hadd.cpp:
--------------------------------------------------------------------------------
  1 | #include <exception>
  2 | #include <algorithm>
  3 | #include <numeric>
  4 | #include <random>
  5 | #include <cmath>
  6 | #include <mipp.h>
  7 | #include <catch.hpp>
  8 | 
  9 | template <typename T>
 10 | void test_reg_hadd_int()
 11 | {
 12 | 	constexpr int N = mipp::N<T>();
 13 | 	T inputs1[N];
 14 | 	std::mt19937 g;
 15 | 	std::uniform_int_distribution<T> dis((T)-10, (T)10);
 16 | 
 17 | 	for (auto j = 0; j < 100; j++)
 18 | 	{
 19 | 		for (auto i = 0; i < N; i++)
 20 | 			inputs1[i] = dis(g);
 21 | 
 22 | 		mipp::reg r1 = mipp::loadu<T>(inputs1);
 23 | 		auto val = mipp::hadd<T>(r1);
 24 | 
 25 | 		int64_t acc = inputs1[0];
 26 | 		for (auto i = 1; i < N; i++)
 27 | 			acc += (int64_t)inputs1[i];
 28 | 
 29 | 		T res = (T)std::max(std::min(acc, (int64_t)std::numeric_limits<T>::max()),
 30 | 		                                  (int64_t)std::numeric_limits<T>::min());
 31 | 
 32 | 		REQUIRE(res == val);
 33 | 	}
 34 | }
 35 | 
 36 | template <typename T>
 37 | void test_reg_hadd_real()
 38 | {
 39 | 	constexpr int N = mipp::N<T>();
 40 | 	T inputs1[N];
 41 | 	std::mt19937 g;
 42 | 	std::uniform_real_distribution<T> dis(-100, 100);
 43 | 
 44 | 	for (auto j = 0; j < 100; j++)
 45 | 	{
 46 | 		for (auto i = 0; i < N; i++)
 47 | 			inputs1[i] = dis(g);
 48 | 
 49 | 		mipp::reg r1 = mipp::loadu<T>(inputs1);
 50 | 		auto val = mipp::hadd<T>(r1);
 51 | 
 52 | 		auto res = inputs1[0];
 53 | 		for (auto i = 1; i < N; i++)
 54 | 			res += inputs1[i];
 55 | 
 56 | 		// REQUIRE(res == Approx(val).epsilon(0.001));
 57 | 		REQUIRE(res == Approx(val));
 58 | 	}
 59 | }
 60 | 
 61 | #ifndef MIPP_NO
 62 | TEST_CASE("Horizontal addition - mipp::reg", "[mipp::hadd]")
 63 | {
 64 | #if defined(MIPP_64BIT)
 65 | 	SECTION("datatype = double") { test_reg_hadd_real<double>(); }
 66 | #endif
 67 | 	SECTION("datatype = float") { test_reg_hadd_real<float>(); }
 68 | 
 69 | #if !defined(MIPP_SVE_LS)
 70 | #if !defined(MIPP_AVX) || (defined(MIPP_AVX) && MIPP_INSTR_VERSION >= 2)
 71 | #if defined(MIPP_64BIT)
 72 | 	SECTION("datatype = int64_t") { test_reg_hadd_int<int64_t>(); }
 73 | #endif
 74 | 	SECTION("datatype = int32_t") { test_reg_hadd_int<int32_t>(); }
 75 | #endif
 76 | #if defined(MIPP_BW)
 77 | #if !defined(MIPP_SSE) || (defined(MIPP_SSE) && MIPP_INSTR_VERSION >= 31)
 78 | 	SECTION("datatype = int16_t") { test_reg_hadd_int<int16_t>(); }
 79 | #ifndef _MSC_VER
 80 | 	SECTION("datatype = int8_t") { test_reg_hadd_int<int8_t>(); }
 81 | #endif
 82 | #endif
 83 | #endif
 84 | #endif
 85 | }
 86 | #endif
 87 | 
 88 | template <typename T>
 89 | void test_Reg_hadd_int()
 90 | {
 91 | 	constexpr int N = mipp::N<T>();
 92 | 	T inputs1[N];
 93 | 	std::mt19937 g;
 94 | 	std::uniform_int_distribution<T> dis((T)-10, (T)10);
 95 | 
 96 | 	for (auto j = 0; j < 100; j++)
 97 | 	{
 98 | 		for (auto i = 0; i < N; i++)
 99 | 			inputs1[i] = dis(g);
100 | 
101 | 		mipp::Reg<T> r1 = inputs1;
102 | 		auto val = mipp::hadd(r1);
103 | 
104 | 		int64_t acc = inputs1[0];
105 | 		for (auto i = 1; i < N; i++)
106 | 			acc += (int64_t)inputs1[i];
107 | 
108 | 		T res = (T)std::max(std::min(acc, (int64_t)std::numeric_limits<T>::max()),
109 | 		                                  (int64_t)std::numeric_limits<T>::min());
110 | 
111 | 		REQUIRE(res == val);
112 | 	}
113 | }
114 | 
115 | template <typename T>
116 | void test_Reg_hadd_real()
117 | {
118 | 	constexpr int N = mipp::N<T>();
119 | 	T inputs1[N];
120 | 	std::mt19937 g;
121 | 	std::uniform_real_distribution<T> dis(-100, 100);
122 | 
123 | 	for (auto j = 0; j < 100; j++)
124 | 	{
125 | 		for (auto i = 0; i < N; i++)
126 | 			inputs1[i] = dis(g);
127 | 
128 | 		mipp::Reg<T> r1 = inputs1;
129 | 		auto val = mipp::hadd(r1);
130 | 
131 | 		auto res = inputs1[0];
132 | 		for (auto i = 1; i < N; i++)
133 | 			res += inputs1[i];
134 | 
135 | 		// REQUIRE(res == Approx(val).epsilon(0.001));
136 | 		REQUIRE(res == Approx(val));
137 | 	}
138 | }
139 | 
140 | TEST_CASE("Horizontal addition - mipp::Reg", "[mipp::hadd]")
141 | {
142 | #if defined(MIPP_64BIT)
143 | 	SECTION("datatype = double") { test_Reg_hadd_real<double>(); }
144 | #endif
145 | 	SECTION("datatype = float") { test_Reg_hadd_real<float>(); }
146 | 
147 | #if !defined(MIPP_SVE_LS)
148 | #if !defined(MIPP_AVX) || (defined(MIPP_AVX) && MIPP_INSTR_VERSION >= 2)
149 | #if defined(MIPP_64BIT)
150 | 	SECTION("datatype = int64_t") { test_Reg_hadd_int<int64_t>(); }
151 | #endif
152 | 	SECTION("datatype = int32_t") { test_Reg_hadd_int<int32_t>(); }
153 | #endif
154 | #if defined(MIPP_BW)
155 | #if !defined(MIPP_SSE) || (defined(MIPP_SSE) && MIPP_INSTR_VERSION >= 31)
156 | 	SECTION("datatype = int16_t") { test_Reg_hadd_int<int16_t>(); }
157 | #ifndef _MSC_VER
158 | 	SECTION("datatype = int8_t") { test_Reg_hadd_int<int8_t>(); }
159 | #endif
160 | #endif
161 | #endif
162 | #endif
163 | }


--------------------------------------------------------------------------------
/tests/src/reductions/hmax.cpp:
--------------------------------------------------------------------------------
  1 | #include <exception>
  2 | #include <algorithm>
  3 | #include <numeric>
  4 | #include <random>
  5 | #include <cmath>
  6 | #include <mipp.h>
  7 | #include <catch.hpp>
  8 | 
  9 | template <typename T>
 10 | void test_reg_hmax_int()
 11 | {
 12 | 	constexpr int N = mipp::N<T>();
 13 | 	T inputs1[N];
 14 | 	std::mt19937 g;
 15 | 	std::uniform_int_distribution<T> dis(std::numeric_limits<T>::min(), std::numeric_limits<T>::max());
 16 | 
 17 | 	for (auto j = 0; j < 100; j++)
 18 | 	{
 19 | 		for (auto i = 0; i < N; i++)
 20 | 			inputs1[i] = dis(g);
 21 | 
 22 | 		mipp::reg r1 = mipp::loadu<T>(inputs1);
 23 | 		auto val = mipp::hmax<T>(r1);
 24 | 
 25 | 		auto res = inputs1[0];
 26 | 		for (auto i = 1; i < N; i++)
 27 | 			res = std::max(res, inputs1[i]);
 28 | 
 29 | 		REQUIRE(res == val);
 30 | 	}
 31 | }
 32 | 
 33 | template <typename T>
 34 | void test_reg_hmax_real()
 35 | {
 36 | 	constexpr int N = mipp::N<T>();
 37 | 	T inputs1[N];
 38 | 	std::mt19937 g;
 39 | 	std::uniform_real_distribution<T> dis(std::numeric_limits<T>::min(), std::numeric_limits<T>::max());
 40 | 
 41 | 	for (auto j = 0; j < 100; j++)
 42 | 	{
 43 | 		for (auto i = 0; i < N; i++)
 44 | 			inputs1[i] = dis(g);
 45 | 
 46 | 		mipp::reg r1 = mipp::loadu<T>(inputs1);
 47 | 		auto val = mipp::hmax<T>(r1);
 48 | 
 49 | 		auto res = inputs1[0];
 50 | 		for (auto i = 1; i < N; i++)
 51 | 			res = std::max(res, inputs1[i]);
 52 | 
 53 | 		REQUIRE(res == val);
 54 | 	}
 55 | }
 56 | 
 57 | #if !defined(MIPP_NO) && !defined(MIPP_SVE_LS)
 58 | TEST_CASE("Horizontal maximum - mipp::reg", "[mipp::hmax]")
 59 | {
 60 | #if defined(MIPP_64BIT)
 61 | 	SECTION("datatype = double") { test_reg_hmax_real<double>(); }
 62 | #endif
 63 | 	SECTION("datatype = float") { test_reg_hmax_real<float>(); }
 64 | 
 65 | #if defined(MIPP_64BIT)
 66 | #if !defined(MIPP_SSE) && !defined(MIPP_AVX) && !defined(MIPP_NEON)
 67 | 	SECTION("datatype = int64_t") { test_reg_hmax_int<int64_t>(); }
 68 | #endif
 69 | #endif
 70 | #if !defined(MIPP_AVX) || (defined(MIPP_AVX) && MIPP_INSTR_VERSION >= 2)
 71 | #if !defined(MIPP_SSE) || (defined(MIPP_SSE) && MIPP_INSTR_VERSION >= 41)
 72 | 	SECTION("datatype = int32_t") { test_reg_hmax_int<int32_t>(); }
 73 | #endif
 74 | #endif
 75 | #if defined(MIPP_BW)
 76 | #if !defined(MIPP_SSE) || (defined(MIPP_SSE) && MIPP_INSTR_VERSION >= 31)
 77 | 	SECTION("datatype = int16_t") { test_reg_hmax_int<int16_t>(); }
 78 | #endif
 79 | #if !defined(MIPP_SSE) || (defined(MIPP_SSE) && MIPP_INSTR_VERSION >= 41)
 80 | #ifndef _MSC_VER
 81 | 	SECTION("datatype = int8_t") { test_reg_hmax_int<int8_t>(); }
 82 | #endif
 83 | #endif
 84 | #endif
 85 | }
 86 | #endif
 87 | 
 88 | template <typename T>
 89 | void test_Reg_hmax_int()
 90 | {
 91 | 	constexpr int N = mipp::N<T>();
 92 | 	T inputs1[N];
 93 | 	std::mt19937 g;
 94 | 	std::uniform_int_distribution<T> dis(std::numeric_limits<T>::min(), std::numeric_limits<T>::max());
 95 | 
 96 | 	for (auto j = 0; j < 100; j++)
 97 | 	{
 98 | 		for (auto i = 0; i < N; i++)
 99 | 			inputs1[i] = dis(g);
100 | 
101 | 		mipp::Reg<T> r1 = inputs1;
102 | 		auto val = mipp::hmax(r1);
103 | 
104 | 		auto res = inputs1[0];
105 | 		for (auto i = 1; i < N; i++)
106 | 			res = std::max(res, inputs1[i]);
107 | 
108 | 		REQUIRE(res == val);
109 | 	}
110 | }
111 | 
112 | template <typename T>
113 | void test_Reg_hmax_real()
114 | {
115 | 	constexpr int N = mipp::N<T>();
116 | 	T inputs1[N];
117 | 	std::mt19937 g;
118 | 	std::uniform_real_distribution<T> dis(std::numeric_limits<T>::min(), std::numeric_limits<T>::max());
119 | 
120 | 	for (auto j = 0; j < 100; j++)
121 | 	{
122 | 		for (auto i = 0; i < N; i++)
123 | 			inputs1[i] = dis(g);
124 | 
125 | 		mipp::Reg<T> r1 = inputs1;
126 | 		auto val = mipp::hmax(r1);
127 | 
128 | 		auto res = inputs1[0];
129 | 		for (auto i = 1; i < N; i++)
130 | 			res = std::max(res, inputs1[i]);
131 | 
132 | 		REQUIRE(res == val);
133 | 	}
134 | }
135 | 
136 | #if !defined(MIPP_SVE_LS)
137 | TEST_CASE("Horizontal maximum - mipp::Reg", "[mipp::hmax]")
138 | {
139 | #if defined(MIPP_64BIT)
140 | 	SECTION("datatype = double") { test_Reg_hmax_real<double>(); }
141 | #endif
142 | 	SECTION("datatype = float") { test_Reg_hmax_real<float>(); }
143 | 
144 | #if defined(MIPP_64BIT)
145 | #if !defined(MIPP_SSE) && !defined(MIPP_AVX) && !defined(MIPP_NEON)
146 | 	SECTION("datatype = int64_t") { test_Reg_hmax_int<int64_t>(); }
147 | #endif
148 | #endif
149 | #if !defined(MIPP_AVX) || (defined(MIPP_AVX) && MIPP_INSTR_VERSION >= 2)
150 | #if !defined(MIPP_SSE) || (defined(MIPP_SSE) && MIPP_INSTR_VERSION >= 41)
151 | 	SECTION("datatype = int32_t") { test_Reg_hmax_int<int32_t>(); }
152 | #endif
153 | #endif
154 | #if defined(MIPP_BW)
155 | #if !defined(MIPP_SSE) || (defined(MIPP_SSE) && MIPP_INSTR_VERSION >= 31)
156 | 	SECTION("datatype = int16_t") { test_Reg_hmax_int<int16_t>(); }
157 | #endif
158 | #if !defined(MIPP_SSE) || (defined(MIPP_SSE) && MIPP_INSTR_VERSION >= 41)
159 | #ifndef _MSC_VER
160 | 	SECTION("datatype = int8_t") { test_Reg_hmax_int<int8_t>(); }
161 | #endif
162 | #endif
163 | #endif
164 | }
165 | #endif
166 | 


--------------------------------------------------------------------------------
/tests/src/reductions/hmin.cpp:
--------------------------------------------------------------------------------
  1 | #include <exception>
  2 | #include <algorithm>
  3 | #include <numeric>
  4 | #include <random>
  5 | #include <cmath>
  6 | #include <mipp.h>
  7 | #include <catch.hpp>
  8 | 
  9 | template <typename T>
 10 | void test_reg_hmin_int()
 11 | {
 12 | 	constexpr int N = mipp::N<T>();
 13 | 	T inputs1[N];
 14 | 	std::mt19937 g;
 15 | 	std::uniform_int_distribution<T> dis(std::numeric_limits<T>::min(), std::numeric_limits<T>::max());
 16 | 
 17 | 	for (auto j = 0; j < 100; j++)
 18 | 	{
 19 | 		for (auto i = 0; i < N; i++)
 20 | 			inputs1[i] = dis(g);
 21 | 
 22 | 		mipp::reg r1 = mipp::loadu<T>(inputs1);
 23 | 		auto val = mipp::hmin<T>(r1);
 24 | 
 25 | 		auto res = inputs1[0];
 26 | 		for (auto i = 1; i < N; i++)
 27 | 			res = std::min(res, inputs1[i]);
 28 | 
 29 | 		REQUIRE(res == val);
 30 | 	}
 31 | }
 32 | 
 33 | template <typename T>
 34 | void test_reg_hmin_real()
 35 | {
 36 | 	constexpr int N = mipp::N<T>();
 37 | 	T inputs1[N];
 38 | 	std::mt19937 g;
 39 | 
 40 | 	std::uniform_real_distribution<T> dis(std::numeric_limits<T>::min(), std::numeric_limits<T>::max());
 41 | 
 42 | 	for (auto j = 0; j < 100; j++)
 43 | 	{
 44 | 		for (auto i = 0; i < N; i++)
 45 | 			inputs1[i] = dis(g);
 46 | 
 47 | 		mipp::reg r1 = mipp::loadu<T>(inputs1);
 48 | 		auto val = mipp::hmin<T>(r1);
 49 | 
 50 | 		auto res = inputs1[0];
 51 | 		for (auto i = 1; i < N; i++)
 52 | 			res = std::min(res, inputs1[i]);
 53 | 
 54 | 		REQUIRE(res == val);
 55 | 	}
 56 | }
 57 | 
 58 | #if !defined(MIPP_NO) && !defined(MIPP_SVE_LS)
 59 | TEST_CASE("Horizontal minimum - mipp::reg", "[mipp::hmin]")
 60 | {
 61 | #if defined(MIPP_64BIT)
 62 | 	SECTION("datatype = double") { test_reg_hmin_real<double>(); }
 63 | #endif
 64 | 	SECTION("datatype = float") { test_reg_hmin_real<float>(); }
 65 | 
 66 | #if defined(MIPP_64BIT)
 67 | #if !defined(MIPP_SSE) && !defined(MIPP_AVX) && !defined(MIPP_NEON)
 68 | 	SECTION("datatype = int64_t") { test_reg_hmin_int<int64_t>(); }
 69 | #endif
 70 | #endif
 71 | #if !defined(MIPP_AVX) || (defined(MIPP_AVX) && MIPP_INSTR_VERSION >= 2)
 72 | #if !defined(MIPP_SSE) || (defined(MIPP_SSE) && MIPP_INSTR_VERSION >= 41)
 73 | 	SECTION("datatype = int32_t") { test_reg_hmin_int<int32_t>(); }
 74 | #endif
 75 | #endif
 76 | #if defined(MIPP_BW)
 77 | #if !defined(MIPP_SSE) || (defined(MIPP_SSE) && MIPP_INSTR_VERSION >= 31)
 78 | 	SECTION("datatype = int16_t") { test_reg_hmin_int<int16_t>(); }
 79 | #endif
 80 | #if !defined(MIPP_SSE) || (defined(MIPP_SSE) && MIPP_INSTR_VERSION >= 41)
 81 | #ifndef _MSC_VER
 82 | 	SECTION("datatype = int8_t") { test_reg_hmin_int<int8_t>(); }
 83 | #endif
 84 | #endif
 85 | #endif
 86 | }
 87 | #endif
 88 | 
 89 | template <typename T>
 90 | void test_Reg_hmin_int()
 91 | {
 92 | 	constexpr int N = mipp::N<T>();
 93 | 	T inputs1[N];
 94 | 	std::mt19937 g;
 95 | 	std::uniform_int_distribution<T> dis(std::numeric_limits<T>::min(), std::numeric_limits<T>::max());
 96 | 
 97 | 	for (auto j = 0; j < 100; j++)
 98 | 	{
 99 | 		for (auto i = 0; i < N; i++)
100 | 			inputs1[i] = dis(g);
101 | 
102 | 		mipp::Reg<T> r1 = inputs1;
103 | 		auto val = mipp::hmin(r1);
104 | 
105 | 		auto res = inputs1[0];
106 | 		for (auto i = 1; i < N; i++)
107 | 			res = std::min(res, inputs1[i]);
108 | 
109 | 		REQUIRE(res == val);
110 | 	}
111 | }
112 | 
113 | template <typename T>
114 | void test_Reg_hmin_real()
115 | {
116 | 	constexpr int N = mipp::N<T>();
117 | 	T inputs1[N];
118 | 	std::mt19937 g;
119 | 	std::uniform_real_distribution<T> dis(std::numeric_limits<T>::min(), std::numeric_limits<T>::max());
120 | 
121 | 	for (auto i = 0; i < 100; i++)
122 | 	{
123 | 		for (auto i = 0; i < N; i++)
124 | 			inputs1[i] = dis(g);
125 | 
126 | 		mipp::Reg<T> r1 = inputs1;
127 | 		auto val = mipp::hmin(r1);
128 | 
129 | 		auto res = inputs1[0];
130 | 		for (auto i = 1; i < N; i++)
131 | 			res = std::min(res, inputs1[i]);
132 | 
133 | 		REQUIRE(res == val);
134 | 	}
135 | }
136 | 
137 | #if !defined(MIPP_SVE_LS)
138 | TEST_CASE("Horizontal minimum - mipp::Reg", "[mipp::hmin]")
139 | {
140 | #if defined(MIPP_64BIT)
141 | 	SECTION("datatype = double") { test_Reg_hmin_real<double>(); }
142 | #endif
143 | 	SECTION("datatype = float") { test_Reg_hmin_real<float>(); }
144 | 
145 | #if defined(MIPP_64BIT)
146 | #if !defined(MIPP_SSE) && !defined(MIPP_AVX) && !defined(MIPP_NEON)
147 | 	SECTION("datatype = int64_t") { test_Reg_hmin_int<int64_t>(); }
148 | #endif
149 | #endif
150 | #if !defined(MIPP_AVX) || (defined(MIPP_AVX) && MIPP_INSTR_VERSION >= 2)
151 | #if !defined(MIPP_SSE) || (defined(MIPP_SSE) && MIPP_INSTR_VERSION >= 41)
152 | 	SECTION("datatype = int32_t") { test_Reg_hmin_int<int32_t>(); }
153 | #endif
154 | #endif
155 | #if defined(MIPP_BW)
156 | #if !defined(MIPP_SSE) || (defined(MIPP_SSE) && MIPP_INSTR_VERSION >= 31)
157 | 	SECTION("datatype = int16_t") { test_Reg_hmin_int<int16_t>(); }
158 | #endif
159 | #if !defined(MIPP_SSE) || (defined(MIPP_SSE) && MIPP_INSTR_VERSION >= 41)
160 | #ifndef _MSC_VER
161 | 	SECTION("datatype = int8_t") { test_Reg_hmin_int<int8_t>(); }
162 | #endif
163 | #endif
164 | #endif
165 | }
166 | #endif
167 | 


--------------------------------------------------------------------------------
/tests/src/reductions/hmul.cpp:
--------------------------------------------------------------------------------
  1 | #include <exception>
  2 | #include <algorithm>
  3 | #include <numeric>
  4 | #include <random>
  5 | #include <cmath>
  6 | #include <mipp.h>
  7 | #include <catch.hpp>
  8 | 
  9 | template <typename T>
 10 | void test_reg_hmul_int()
 11 | {
 12 | 	constexpr int N = mipp::N<T>();
 13 | 	T inputs1[N];
 14 | 	std::mt19937 g;
 15 | 	std::uniform_int_distribution<T> dis((T)-10, (T)10);
 16 | 
 17 | 	for (auto j = 0; j < 100; j++)
 18 | 	{
 19 | 		for (auto i = 0; i < N; i++)
 20 | 			inputs1[i] = dis(g);
 21 | 
 22 | 		mipp::reg r1 = mipp::loadu<T>(inputs1);
 23 | 		auto val = mipp::hmul<T>(r1);
 24 | 
 25 | 		auto res = inputs1[0];
 26 | 		for (auto i = 1; i < N; i++)
 27 | 			res *= inputs1[i];
 28 | 
 29 | 		REQUIRE(res == val);
 30 | 	}
 31 | }
 32 | 
 33 | template <typename T>
 34 | void test_reg_hmul_real()
 35 | {
 36 | 	constexpr int N = mipp::N<T>();
 37 | 	T inputs1[N];
 38 | 	std::mt19937 g;
 39 | 	std::uniform_real_distribution<T> dis(-2, 2);
 40 | 
 41 | 	for (auto j = 0; j < 100; j++)
 42 | 	{
 43 | 		for (auto i = 0; i < N; i++)
 44 | 			inputs1[i] = dis(g);
 45 | 
 46 | 		mipp::reg r1 = mipp::loadu<T>(inputs1);
 47 | 		auto val = mipp::hmul<T>(r1);
 48 | 
 49 | 		auto res = inputs1[0];
 50 | 		for (auto i = 1; i < N; i++)
 51 | 			res *= inputs1[i];
 52 | 
 53 | 		// REQUIRE(res == Approx(val).epsilon(0.001));
 54 | 		REQUIRE(res == Approx(val));
 55 | 	}
 56 | }
 57 | 
 58 | #if !defined(MIPP_NO) && !defined(MIPP_SVE_LS)
 59 | TEST_CASE("Horizontal multiplication - mipp::reg", "[mipp::hmul]")
 60 | {
 61 | #if defined(MIPP_64BIT)
 62 | 	SECTION("datatype = double") { test_reg_hmul_real<double>(); }
 63 | #endif
 64 | 	SECTION("datatype = float") { test_reg_hmul_real<float>(); }
 65 | }
 66 | #endif
 67 | 
 68 | template <typename T>
 69 | void test_Reg_hmul_int()
 70 | {
 71 | 	constexpr int N = mipp::N<T>();
 72 | 	T inputs1[N];
 73 | 	std::mt19937 g;
 74 | 	std::uniform_int_distribution<T> dis((T)-10, (T)10);
 75 | 
 76 | 	for (auto j = 0; j < 100; j++)
 77 | 	{
 78 | 		for (auto i = 0; i < N; i++)
 79 | 			inputs1[i] = dis(g);
 80 | 
 81 | 		mipp::Reg<T> r1 = inputs1;
 82 | 		auto val = mipp::hmul(r1);
 83 | 
 84 | 		auto res = inputs1[0];
 85 | 		for (auto i = 1; i < N; i++)
 86 | 			res *= inputs1[i];
 87 | 
 88 | 		REQUIRE(res == val);
 89 | 	}
 90 | }
 91 | 
 92 | template <typename T>
 93 | void test_Reg_hmul_real()
 94 | {
 95 | 	constexpr int N = mipp::N<T>();
 96 | 	T inputs1[N];
 97 | 	std::mt19937 g;
 98 | 	std::uniform_real_distribution<T> dis(-2, 2);
 99 | 
100 | 	for (auto j = 0; j < 100; j++)
101 | 	{
102 | 		for (auto i = 0; i < N; i++)
103 | 			inputs1[i] = dis(g);
104 | 
105 | 		mipp::Reg<T> r1 = inputs1;
106 | 		auto val = mipp::hmul(r1);
107 | 
108 | 		auto res = inputs1[0];
109 | 		for (auto i = 1; i < N; i++)
110 | 			res *= inputs1[i];
111 | 
112 | 		// REQUIRE(res == Approx(val).epsilon(0.001));
113 | 		REQUIRE(res == Approx(val));
114 | 	}
115 | }
116 | 
117 | #if !defined(MIPP_NO) && !defined(MIPP_SVE_LS)
118 | TEST_CASE("Horizontal multiplication - mipp::Reg", "[mipp::hmul]")
119 | {
120 | #if defined(MIPP_64BIT)
121 | 	SECTION("datatype = double") { test_Reg_hmul_real<double>(); }
122 | #endif
123 | 	SECTION("datatype = float") { test_Reg_hmul_real<float>(); }
124 | }
125 | #endif
126 | 


--------------------------------------------------------------------------------
/tests/src/static_for.hpp:
--------------------------------------------------------------------------------
 1 | // from https://www.codeproject.com/Articles/857354/Compile-Time-Loops-with-Cplusplus-Creating-a-Gener
 2 | 
 3 | #ifndef STATIC_FOR_HPP_
 4 | #define STATIC_FOR_HPP_
 5 | 
 6 | #include <cstdint>
 7 | 
 8 | #include "static_for.hxx"
 9 | 
10 | template<size_t count, typename functor, size_t sequence_width = 70,
11 |     typename... functor_types>
12 | inline void static_for(functor_types&&... functor_args)
13 | {
14 |     static_for_impl<0, count-1, functor, sequence_width, functor_types...>::
15 |         loop(std::forward<functor_types>(functor_args)...);
16 | }
17 | 
18 | template<size_t start, size_t end, typename functor, size_t sequence_width = 70,
19 |     typename... functor_types>
20 | inline void static_for(functor_types&&... functor_args)
21 | {
22 |     static_for_impl<start, end, functor, sequence_width, functor_types...>::
23 |         loop(std::forward<functor_types>(functor_args)...);
24 | }
25 | 
26 | #endif // STATIC_FOR_HPP_


--------------------------------------------------------------------------------
/tests/src/static_for.hxx:
--------------------------------------------------------------------------------
  1 | #include <type_traits>
  2 | #include <utility>
  3 | 
  4 | #include "static_for.hpp"
  5 | 
  6 | template<size_t for_start, size_t for_end, typename functor, size_t sequence_width,
  7 |     typename... functor_types>
  8 | struct static_for_impl
  9 | {
 10 |     static inline void loop(functor_types&&... functor_args)
 11 |     {
 12 |         // The main sequence point is created, and then we call "next" on each point inside
 13 |         using sequence = point<for_start, for_end>;
 14 |         next<sequence>
 15 |             (std::integral_constant<bool, sequence::is_end_point_>(), 
 16 |              std::forward<functor_types>(functor_args)...);
 17 |     }
 18 | 
 19 | private:
 20 |     
 21 |     // A point is a node of an n-ary tree
 22 |     template<size_t pt_start, size_t pt_end> struct point
 23 |     {
 24 |         static constexpr size_t start_        { pt_start };
 25 |         static constexpr size_t end_          { pt_end };
 26 |         static constexpr size_t count_        { end_ - start_ + 1 };
 27 |         static constexpr bool is_end_point_   { count_ <= sequence_width };
 28 | 
 29 |         static constexpr size_t sequence_count()
 30 |         {
 31 |             return
 32 |                     points_in_sequence(sequence_width) > sequence_width
 33 |                 ?
 34 |                     sequence_width
 35 |                 :
 36 |                     points_in_sequence(sequence_width);
 37 |         }
 38 | 
 39 |     private:
 40 |         // Calculates the start and end indexes for a child node
 41 |         static constexpr size_t child_start(size_t index)
 42 |         {
 43 |             return
 44 |                     index == 0
 45 |                 ?
 46 |                     pt_start
 47 |                 :
 48 |                     child_end(index - 1) + 1;
 49 |         }
 50 |         static constexpr size_t child_end(size_t index)
 51 |         {
 52 |             return
 53 |                     index == sequence_count() - 1
 54 |                 ?
 55 |                     pt_end
 56 |                 :
 57 |                     pt_start + points_in_sequence(sequence_count()) * (index + 1) -
 58 |                         (index < count_
 59 |                     ?
 60 |                          1
 61 |                     :
 62 |                          0);
 63 |         }
 64 |         static constexpr size_t points_in_sequence(size_t max)
 65 |         {
 66 |             return count_ / max + (
 67 |                     (count_ % max) > 0
 68 |                 ?
 69 |                     1
 70 |                 :
 71 |                     0);
 72 |         }
 73 |            
 74 |     public:
 75 |         // Generates child nodes when needed
 76 |         template<size_t index> using child_point = point<child_start(index), child_end(index)>;
 77 |     };
 78 | 
 79 |     // flat_for is used to instantiate a section of our our main static_for::loop
 80 |     // A point is used to specify which numbers this instance of flat_for will use
 81 |     template<size_t flat_start, size_t flat_end, class flat_functor> struct flat_for
 82 |     {
 83 |         // This is the entry point for flat_for
 84 |         static inline void flat_loop(functor_types&&... functor_args)
 85 |         {
 86 |             flat_next(std::integral_constant<size_t, flat_start>(), 
 87 |                 std::forward<functor_types>(functor_args)...);
 88 |         }
 89 | 
 90 |     private:
 91 |         // Loop termination
 92 |         static inline void flat_next
 93 |             (std::integral_constant<size_t, flat_end + 1>, functor_types&&...)
 94 |         {
 95 |         }
 96 |        
 97 |         // Loop function that calls the function passed to it, as well as recurses
 98 |         template<size_t index>
 99 |         static inline void flat_next
100 |             (std::integral_constant<size_t, index>, functor_types&&... functor_args)
101 |         {
102 |             flat_functor::template func<index>(std::forward<functor_types>(functor_args)...);
103 |             flat_next(std::integral_constant<size_t, index + 1>(),
104 |                 std::forward<functor_types>(functor_args)...);
105 |         }
106 |     };
107 | 
108 |     // This is what gets called when we run flat_for on a point
109 |     // It will recurse to more finer grained point until the points are no bigger than sequence_width
110 |     template<typename sequence> struct flat_sequence
111 |     {
112 |         template<size_t index> static inline void func(functor_types&&... functor_args)
113 |         {
114 |             using pt = typename sequence::template child_point<index>;
115 |             next<pt>
116 |                 (std::integral_constant<bool, pt::is_end_point_>(),
117 |                  std::forward<functor_types>(functor_args)...);
118 |         }
119 |     };
120 | 
121 |     // The true_type function is called when our sequence is small enough to run out
122 |     // and call the main functor that was provided to us
123 |     template<typename sequence> static inline void next
124 |         (std::true_type, functor_types&&... functor_args)
125 |     {
126 |         flat_for<sequence::start_, sequence::end_, functor>::
127 |             flat_loop(std::forward<functor_types>(functor_args)...);
128 |     }
129 | 
130 |     // The false_type function is called when our sequence is still too big, and we need to
131 |     // run an internal flat_for loop on the child sequence_points 
132 |     template<typename sequence> static inline void next
133 |         (std::false_type, functor_types&&... functor_args)
134 |     {
135 |         flat_for<0, sequence::sequence_count() - 1, flat_sequence<sequence>>::
136 |             flat_loop(std::forward<functor_types>(functor_args)...);
137 |     }
138 | };


--------------------------------------------------------------------------------